diff --git a/.gitignore b/.gitignore index f9082380e..5e1dbc6eb 100644 --- a/.gitignore +++ b/.gitignore @@ -162,3 +162,4 @@ pufferlib/ocean/impulse_wars/*-release/ pufferlib/ocean/impulse_wars/debug-*/ pufferlib/ocean/impulse_wars/release-*/ pufferlib/ocean/impulse_wars/benchmark/ +pufferlib/ocean/dogfight/dogfight_test diff --git a/pufferlib/config/default.ini b/pufferlib/config/default.ini index 6073c651e..6d62c5bd8 100644 --- a/pufferlib/config/default.ini +++ b/pufferlib/config/default.ini @@ -28,16 +28,16 @@ device = cuda optimizer = muon anneal_lr = True precision = float32 -total_timesteps = 10_000_000 +total_timesteps = 100_000_000 learning_rate = 0.015 gamma = 0.995 -gae_lambda = 0.90 +gae_lambda = 0.95 update_epochs = 1 clip_coef = 0.2 vf_coef = 2.0 vf_clip_coef = 0.2 max_grad_norm = 1.5 -ent_coef = 0.001 +ent_coef = 0.01 adam_beta1 = 0.95 adam_beta2 = 0.999 adam_eps = 1e-12 @@ -45,7 +45,7 @@ adam_eps = 1e-12 data_dir = experiments checkpoint_interval = 200 batch_size = auto -minibatch_size = 8192 +minibatch_size = 16384 # Accumulate gradients above this size max_minibatch_size = 32768 @@ -58,7 +58,7 @@ vtrace_rho_clip = 1.0 vtrace_c_clip = 1.0 prio_alpha = 0.8 -prio_beta0 = 0.2 +prio_beta0 = 0.5 [sweep] method = Protein @@ -83,16 +83,9 @@ max = 1e10 mean = 2e8 scale = time -[sweep.train.bptt_horizon] -distribution = uniform_pow2 -min = 16 -max = 64 -mean = 64 -scale = auto - [sweep.train.minibatch_size] distribution = uniform_pow2 -min = 8192 +min = 16384 max = 65536 mean = 32768 scale = auto diff --git a/pufferlib/config/ocean/dogfight.ini b/pufferlib/config/ocean/dogfight.ini new file mode 100644 index 000000000..698baf084 --- /dev/null +++ b/pufferlib/config/ocean/dogfight.ini @@ -0,0 +1,206 @@ +[base] +env_name = puffer_dogfight +package = ocean +policy_name = Policy +rnn_name = Recurrent + +[policy] +hidden_size = 128 + +[rnn] +input_size = 128 +hidden_size = 128 + +[vec] +num_envs = 8 + +[env] +alt_max = 2500.0 +curriculum_enabled = 1 +curriculum_randomize = 0 +episodes_per_stage = 25 +max_steps = 3000 +num_envs = 1024 +obs_scheme = 4 +penalty_aileron = 0.004787828722037375 +penalty_bias = 0.019452434551902115 +penalty_neg_g = 0.038022669870406395 +penalty_roll = 0.0019647147422656415 +penalty_rudder = 0.00015276678362861277 +penalty_stall = 0.0007385806553065777 +reward_approach = 0.0065743024460971355 +reward_closing_scale = 0.0011914868978783488 +reward_firing_solution = 0.045721526537090544 +reward_level = 0.025920397927984597 +reward_tail_scale = 0.0009967820532619954 +reward_tracking = 0.031819639401510356 +speed_min = 50.0 +aim_cone_start = 0.3856851533800364 +aim_cone_end = 0.05015228554606438 +aim_anneal_episodes = 500 + +[train] +adam_beta1 = 0.9723082880428708 +adam_beta2 = 0.9912225347178505 +adam_eps = 8.183951125996682e-13 +batch_size = auto +bptt_horizon = 64 +checkpoint_interval = 200 +clip_coef = 0.983341504810378 +ent_coef = 0.03071064008271062 +gae_lambda = 0.9949418302404375 +gamma = 0.9855692943246729 +learning_rate = 0.0003102693135543651 +max_grad_norm = 1.955089159309864 +max_minibatch_size = 32768 +minibatch_size = 65536 +prio_alpha = 0.9022484586887103 +prio_beta0 = 0.8983571008600393 +seed = 42 +total_timesteps = 100_000_000 +update_epochs = 4 +vf_clip_coef = 0.4664481597021223 +vf_coef = 1.3376509584486485 +vtrace_c_clip = 0.4391395812854171 +vtrace_rho_clip = 4.6142582874745 + +[sweep] +downsample = 1 +goal = maximize +method = Protein +metric = ultimate +prune_pareto = True +use_gpu = True + +[sweep.env.obs_scheme] +distribution = int_uniform +max = 5 +mean = 0 +min = 0 +scale = 1.0 + +[sweep.env.episodes_per_stage] +distribution = int_uniform +min = 20 +max = 75 +mean = 25 +scale = 1.0 + +[sweep.env.penalty_stall] +distribution = uniform +max = 0.005 +mean = 0.0016092406492793122 +min = 0.0 +scale = auto + +[sweep.env.penalty_roll] +distribution = uniform +max = 0.003 +mean = 0.0021072644960864573 +min = 0.0 +scale = auto + +[sweep.env.penalty_neg_g] +distribution = uniform +max = 0.1 +mean = 0.05 +min = 0.01 +scale = auto + +[sweep.env.penalty_rudder] +distribution = uniform +max = 0.001 +mean = 0.0002985792260932028 +min = 0.0001 +scale = auto + +[sweep.env.penalty_aileron] +distribution = uniform +max = 0.005 +mean = 0.002 +min = 0.0 +scale = auto + +[sweep.env.penalty_bias] +distribution = uniform +max = 0.02 +mean = 0.008614029763839244 +min = 0.001 +scale = auto + +[sweep.env.reward_approach] +distribution = uniform +max = 0.02 +mean = 0.003836667464147351 +min = 0.0 +scale = auto + +[sweep.env.reward_level] +distribution = uniform +max = 0.05 +mean = 0.029797846539013125 +min = 0.0 +scale = auto + +[sweep.env.reward_closing_scale] +distribution = uniform +max = 0.005 +mean = 0.005 +min = 0.0 +scale = auto + +[sweep.env.reward_firing_solution] +distribution = uniform +max = 0.1 +mean = 0.01 +min = 0.0 +scale = auto + +[sweep.env.reward_tail_scale] +distribution = uniform +max = 0.01 +mean = 0.005 +min = 0.0 +scale = auto + +[sweep.env.reward_tracking] +distribution = uniform +max = 0.05 +mean = 0.005177132307187232 +min = 0.0 +scale = auto + +[sweep.env.aim_cone_start] +distribution = uniform +max = 0.52 +mean = 0.35 +min = 0.17 +scale = auto + +[sweep.env.aim_cone_end] +distribution = uniform +max = 0.17 +mean = 0.087 +min = 0.05 +scale = auto + +[sweep.env.aim_anneal_episodes] +distribution = int_uniform +min = 50 +max = 5000 +mean = 500 +scale = 1.0 + +[sweep.train.learning_rate] +distribution = log_normal +max = 0.0005 +mean = 9.0e-06 +min = 0.000000001 +scale = 0.5 + +[sweep.train.total_timesteps] +distribution = log_normal +max = 1.01e8 +mean = 1.005e8 +min = 1.0e8 +scale = time diff --git a/pufferlib/environments/mani_skill/torch.py b/pufferlib/environments/mani_skill/torch.py index abb8eaa18..c2e5a795d 100644 --- a/pufferlib/environments/mani_skill/torch.py +++ b/pufferlib/environments/mani_skill/torch.py @@ -64,7 +64,7 @@ def decode_actions(self, hidden): '''Decodes a batch of hidden states into (multi)discrete actions. Assumes no time dimension (handled by LSTM wrappers).''' mean = self.decoder_mean(hidden) - logstd = self.decoder_logstd.expand_as(mean) + logstd = self.decoder_logstd.expand_as(mean).clamp(min=-20, max=2) std = torch.exp(logstd) logits = torch.distributions.Normal(mean, std) values = self.value(hidden) diff --git a/pufferlib/models.py b/pufferlib/models.py index fa43d7071..d81198343 100644 --- a/pufferlib/models.py +++ b/pufferlib/models.py @@ -88,7 +88,7 @@ def decode_actions(self, hidden): logits = self.decoder(hidden).split(self.action_nvec, dim=1) elif self.is_continuous: mean = self.decoder_mean(hidden) - logstd = self.decoder_logstd.expand_as(mean) + logstd = self.decoder_logstd.expand_as(mean).clamp(min=-20, max=2) std = torch.exp(logstd) logits = torch.distributions.Normal(mean, std) else: diff --git a/pufferlib/ocean/ENV_GUIDE.md b/pufferlib/ocean/ENV_GUIDE.md new file mode 100644 index 000000000..8e7fb1bd8 --- /dev/null +++ b/pufferlib/ocean/ENV_GUIDE.md @@ -0,0 +1,284 @@ +# PufferLib Ocean Environment Guide + +Quick reference for implementing C-based RL environments in PufferLib. + +## File Structure + +``` +pufferlib/ocean/{env_name}/ +├── {env_name}.h # C implementation (header-only) +├── binding.c # Python-C glue (~20 lines) +└── {env_name}.py # Python wrapper + +pufferlib/config/ocean/{env_name}.ini # Training config +``` + +Build: `python setup.py build_ext --inplace --force` + +## 1. C Header (`{env_name}.h`) + +### Required Structs + +```c +// Log: ONLY floats, last field must be `n` +typedef struct Log { + float episode_return; + float episode_length; + float score; + float perf; // 0-1 normalized metric + // ... custom metrics ... + float n; // REQUIRED last: episode count +} Log; + +// Main env struct +typedef struct EnvName { + float* observations; // or char* for discrete obs + float* actions; // ALWAYS float* (even discrete) + float* rewards; + unsigned char* terminals; + Log log; + Client* client; // raylib, NULL until render + // ... env state ... +} EnvName; +``` + +### Required Functions + +| Function | Purpose | +|----------|---------| +| `init(Env*)` | Allocate internal buffers | +| `c_reset(Env*)` | Reset episode state | +| `c_step(Env*)` | Advance simulation | +| `c_render(Env*)` | Raylib rendering | +| `c_close(Env*)` | Free memory | +| `compute_observations(Env*)` | Fill obs buffer | +| `add_log(Env*, ...)` | Accumulate stats | + +### Step Pattern + +```c +void c_step(Env* env) { + env->tick++; + env->rewards[0] = 0; + env->terminals[0] = 0; + + // ... physics/game logic ... + + if (terminal_condition) { + env->terminals[0] = 1; + add_log(env, ...); + c_reset(env); + return; + } + compute_observations(env); +} +``` + +### Logging Pattern + +```c +void add_log(Env* env) { + env->log.episode_return += env->episodic_return; + env->log.episode_length += env->tick; + env->log.score += env->score; + env->log.n += 1.0f; // increment episode count +} +``` + +## 2. Binding (`binding.c`) + +```c +#include "{env_name}.h" + +#define Env EnvName +#include "../env_binding.h" + +static int my_init(Env* env, PyObject* args, PyObject* kwargs) { + env->param1 = unpack(kwargs, "param1"); + env->param2 = unpack(kwargs, "param2"); + init(env); + return 0; +} + +static int my_log(PyObject* dict, Log* log) { + assign_to_dict(dict, "episode_return", log->episode_return); + assign_to_dict(dict, "episode_length", log->episode_length); + assign_to_dict(dict, "score", log->score); + assign_to_dict(dict, "perf", log->perf); + return 0; +} +``` + +## 3. Python Wrapper (`{env_name}.py`) + +```python +import numpy as np +import gymnasium +import pufferlib +from pufferlib.ocean.{env_name} import binding + +class EnvName(pufferlib.PufferEnv): + def __init__(self, num_envs=16, render_mode=None, buf=None, + param1=100, param2=0.5, **kwargs): + + self.single_observation_space = gymnasium.spaces.Box( + low=-1, high=1, shape=(OBS_SIZE,), dtype=np.float32 + ) + # Continuous: Box Discrete: Discrete(n) + self.single_action_space = gymnasium.spaces.Box( + low=-1, high=1, shape=(ACT_SIZE,), dtype=np.float32 + ) + + self.num_agents = num_envs + self.render_mode = render_mode + super().__init__(buf) + + # CRITICAL for continuous actions: + self.actions = self.actions.astype(np.float32) + + c_envs = [] + for i in range(num_envs): + c_envs.append(binding.env_init( + self.observations[i:i+1], + self.actions[i:i+1], + self.rewards[i:i+1], + self.terminals[i:i+1], + self.truncations[i:i+1], + i, # seed + param1=param1, + param2=param2, + )) + self.c_envs = binding.vectorize(*c_envs) + + def reset(self, seed=None): + self.tick = 0 + binding.vec_reset(self.c_envs, seed or 0) + return self.observations, [] + + def step(self, actions): + self.actions[:] = actions + self.tick += 1 + binding.vec_step(self.c_envs) + + info = [] + log = binding.vec_log(self.c_envs) + if log: + info.append(log) + return (self.observations, self.rewards, + self.terminals, self.truncations, info) + + def render(self): + binding.vec_render(self.c_envs, 0) + + def close(self): + binding.vec_close(self.c_envs) +``` + +## 4. Config (`pufferlib/config/ocean/{env_name}.ini`) + +```ini +[base] +package = ocean +env_name = puffer_{env_name} + +[vec] +num_envs = 8 + +[env] +num_envs = 1024 +param1 = 100 +param2 = 0.5 + +[train] +total_timesteps = 100_000_000 +learning_rate = 0.0003 +gamma = 0.99 +# ... PPO hyperparams ... +``` + +## Reference Environments + +| Env | Use For | +|-----|---------| +| `drone_race/` | Continuous actions, quaternions, RK4 physics | +| `drone_swarm/` | Multi-agent continuous | +| `snake/` | Multi-agent discrete, grid world | +| `target/` | Simple tutorial, well-commented | +| `impulse_wars/` | Box2D physics integration | + +## Common Patterns + +### Vector/Quaternion Math (from dronelib.h) + +```c +typedef struct { float x, y, z; } Vec3; +typedef struct { float w, x, y, z; } Quat; + +Vec3 add3(Vec3 a, Vec3 b); +Vec3 sub3(Vec3 a, Vec3 b); +Vec3 scalmul3(Vec3 a, float s); +float dot3(Vec3 a, Vec3 b); +float norm3(Vec3 a); + +Quat quat_mul(Quat a, Quat b); +void quat_normalize(Quat* q); +Vec3 quat_rotate(Quat q, Vec3 v); +Quat quat_inverse(Quat q); +``` + +### Observation Normalization + +```c +// Normalize to roughly [-1, 1] +env->observations[0] = position.x / MAX_X; +env->observations[1] = velocity.x / MAX_VEL; +env->observations[2] = quat.w; // already [-1, 1] +``` + +### Action Handling + +```c +// Continuous: actions already in [-1, 1] +float throttle = (env->actions[0] + 1.0f) * 0.5f; // remap to [0, 1] +float elevator = env->actions[1]; // keep [-1, 1] + +// Discrete trigger +bool fire = env->actions[4] > 0.5f; +``` + +### Raylib Rendering + +```c +void c_render(Env* env) { + if (env->client == NULL) { + InitWindow(WIDTH, HEIGHT, "Env Name"); + SetTargetFPS(60); + env->client = calloc(1, sizeof(Client)); + } + + if (IsKeyDown(KEY_ESCAPE)) exit(0); + + BeginDrawing(); + ClearBackground((Color){6, 24, 24, 255}); + // ... draw stuff ... + EndDrawing(); +} +``` + +## Performance Tips + +1. **No allocations after init** - malloc only in `init()` +2. **Pass structs by pointer** - avoid copies +3. **Inline small functions** - `static inline` +4. **Batch operations** - process all agents in tight loops +5. **Avoid divisions** - precompute `1/x` where possible + +## Checklist for New Env + +- [ ] Create folder `pufferlib/ocean/{name}/` +- [ ] Implement `{name}.h` with all required functions +- [ ] Create `binding.c` with `my_init()` and `my_log()` +- [ ] Create `{name}.py` Python wrapper +- [ ] Create `pufferlib/config/ocean/{name}.ini` +- [ ] Build: `python setup.py build_ext --inplace --force` +- [ ] Test: `from pufferlib.ocean.{name} import EnvName` diff --git a/pufferlib/ocean/dogfight/AIRCRAFT_PERFORMANCE_RL_GUIDE.md b/pufferlib/ocean/dogfight/AIRCRAFT_PERFORMANCE_RL_GUIDE.md new file mode 100644 index 000000000..588e337ab --- /dev/null +++ b/pufferlib/ocean/dogfight/AIRCRAFT_PERFORMANCE_RL_GUIDE.md @@ -0,0 +1,1073 @@ +# Aircraft Performance Approximation for High-Performance RL Environments + +## A Comprehensive Guide for WW2 Dogfighting Simulation + +**Purpose**: This document provides the mathematical foundations, equations, and implementation strategies for approximating aircraft performance in a headless reinforcement learning training environment. The goal is to achieve very high steps-per-second (SPS) while maintaining physically plausible flight dynamics suitable for WW2 dogfighting scenarios. + +**Target Audience**: Claude agents developing a WW2 dogfighting RL environment using PufferLib or similar frameworks. + +--- + +## Table of Contents + +1. [Philosophy: Fidelity vs. Performance Trade-offs](#1-philosophy-fidelity-vs-performance-trade-offs) +2. [Coordinate Systems and Reference Frames](#2-coordinate-systems-and-reference-frames) +3. [Equations of Motion](#3-equations-of-motion) +4. [Aerodynamic Force Models](#4-aerodynamic-force-models) +5. [The Drag Polar](#5-the-drag-polar) +6. [Lift Coefficient Modeling](#6-lift-coefficient-modeling) +7. [Propulsion: Piston Engine and Propeller Models](#7-propulsion-piston-engine-and-propeller-models) +8. [Atmospheric Model](#8-atmospheric-model) +9. [Performance Calculations](#9-performance-calculations) +10. [Implementation Strategies for High SPS](#10-implementation-strategies-for-high-sps) +11. [WW2 Aircraft Reference Data](#11-ww2-aircraft-reference-data) +12. [Validation and Sanity Checks](#12-validation-and-sanity-checks) +13. [Sources and References](#13-sources-and-references) + +--- + +## 1. Philosophy: Fidelity vs. Performance Trade-offs + +### The Core Challenge + +Full 6-DOF flight dynamics models (like JSBSim or Stevens & Lewis F-16 models) are computationally expensive. For RL training requiring millions of environment steps, we need simplifications that: + +1. **Preserve emergent behavior**: Aircraft should fly like aircraft—stall at high AoA, turn radius should increase with speed, climb rate should decrease with altitude +2. **Enable fast vectorized computation**: All operations should be expressible as numpy/JAX/PyTorch tensor operations +3. **Avoid lookup tables where possible**: Analytical approximations are faster than interpolation +4. **Capture the essence of dogfighting**: Energy management, turn performance, climb/dive dynamics + +### Recommended Model Hierarchy + +| Model Type | DOF | Use Case | Typical SPS (single env) | +|------------|-----|----------|--------------------------| +| Full 6-DOF with stability derivatives | 12+ states | Flight sim, detailed control | 1,000-10,000 | +| Point-mass 3-DOF (vertical plane) | 6 states | Trajectory optimization | 50,000-200,000 | +| **Point-mass 3-DOF (3D)** | **6-9 states** | **RL dogfighting (recommended)** | **100,000-500,000** | +| Energy-state approximation | 3-4 states | Strategic AI | 1,000,000+ | + +**Recommendation**: Use a 3-DOF point-mass model with instantaneous bank angle changes for maximum performance while retaining meaningful dogfight dynamics. + +--- + +## 2. Coordinate Systems and Reference Frames + +### Earth-Fixed Frame (Inertial, Flat Earth Approximation) +- Origin at some reference point on ground +- **x**: North (or arbitrary horizontal) +- **y**: East (or perpendicular horizontal) +- **z**: Down (positive toward Earth center) + +For WW2 dogfighting in a local area, flat Earth is entirely acceptable. + +### Body-Fixed Frame +- Origin at aircraft CG +- **x_b**: Forward along fuselage +- **y_b**: Right wing +- **z_b**: Down through belly + +### Wind/Velocity Frame +- **x_w**: Along velocity vector +- **z_w**: Perpendicular, in vertical plane containing velocity + +### Key Angles +``` +α (alpha) = Angle of Attack = angle between x_b and velocity vector (in vertical plane) +β (beta) = Sideslip angle = angle between velocity and x_b-z_b plane +γ (gamma) = Flight path angle = angle between velocity and horizontal +ψ (psi) = Heading angle = horizontal direction of velocity +φ (phi) = Bank/roll angle +θ (theta) = Pitch angle +``` + +For 3-DOF point-mass: we typically track (V, γ, ψ, x, y, h) where bank angle φ is a control input that changes instantaneously. + +--- + +## 3. Equations of Motion + +### 3-DOF Point-Mass Model (Recommended for RL) + +This model treats the aircraft as a point mass with forces applied. It captures the essential performance characteristics without modeling rotational dynamics. + +**State Vector**: `[V, γ, ψ, x, y, h]` or `[V, γ, ψ, x, y, h, m]` if tracking fuel + +**Control Inputs**: `[T (thrust/throttle), n (load factor) or φ (bank), α (angle of attack)]` + +#### Kinematic Equations +```python +dx/dt = V * cos(γ) * cos(ψ) +dy/dt = V * cos(γ) * sin(ψ) +dh/dt = V * sin(γ) # Note: h positive up, so dh/dt = -dz/dt +``` + +#### Dynamic Equations (Forces) + +In the wind-axes frame, summing forces parallel and perpendicular to velocity: + +```python +# Along velocity (tangent to flight path) +m * dV/dt = T * cos(α) - D - W * sin(γ) + +# Perpendicular to velocity, in vertical plane +m * V * dγ/dt = L * cos(φ) + T * sin(α) - W * cos(γ) + +# Perpendicular to velocity, horizontal (turning) +m * V * cos(γ) * dψ/dt = L * sin(φ) +``` + +Where: +- `T` = Thrust +- `D` = Drag +- `L` = Lift +- `W = m * g` = Weight +- `φ` = Bank angle +- `α` = Angle of attack (typically small, so cos(α) ≈ 1, sin(α) ≈ α) + +#### Simplified Form (Small α Approximation) + +For most flight conditions where α < 15°: + +```python +dV/dt = (T - D) / m - g * sin(γ) +dγ/dt = (L * cos(φ) - W * cos(γ)) / (m * V) +dψ/dt = (L * sin(φ)) / (m * V * cos(γ)) +``` + +#### Load Factor Formulation (Often More Convenient) + +Define load factor `n = L / W`: + +```python +dV/dt = (T - D) / m - g * sin(γ) +dγ/dt = (g / V) * (n * cos(φ) - cos(γ)) +dψ/dt = (g * n * sin(φ)) / (V * cos(γ)) +``` + +For a **coordinated turn** (no sideslip), the relationship is: +```python +n = 1 / cos(φ) # for level turn +``` + +So for a 60° bank, n = 2 ("2g turn"). + +--- + +## 4. Aerodynamic Force Models + +### Dynamic Pressure + +The fundamental scaling quantity: +```python +q = 0.5 * ρ * V² +``` +Where `ρ` is air density (kg/m³) and `V` is true airspeed (m/s). + +### Lift Force +```python +L = q * S * C_L +``` +Where: +- `S` = Wing reference area (m²) +- `C_L` = Lift coefficient (dimensionless) + +### Drag Force +```python +D = q * S * C_D +``` +Where `C_D` = Drag coefficient (dimensionless) + +### Converting to Accelerations +```python +# Specific forces (acceleration per unit mass) +L/m = q * S * C_L / m = (q/W) * g * S * C_L = (ρ * V² * S * C_L) / (2 * m) + +# Wing loading W/S is a key aircraft parameter +# Let W/S = wing loading in N/m² or lb/ft² +L/W = (q * C_L) / (W/S) +``` + +--- + +## 5. The Drag Polar + +### The Parabolic Drag Polar (Primary Model) + +The most important equation for aircraft performance: + +```python +C_D = C_D0 + K * C_L² +``` + +Where: +- `C_D0` = Zero-lift drag coefficient (parasite drag) +- `K` = Induced drag factor = 1 / (π * AR * e) +- `AR` = Aspect Ratio = b² / S (wingspan squared over wing area) +- `e` = Oswald efficiency factor (typically 0.7-0.85 for WW2 fighters) + +### Computing K from Aircraft Geometry + +```python +AR = b² / S # Aspect ratio +e = 0.78 # Typical for straight-wing WW2 fighter (Oswald efficiency) +K = 1 / (π * AR * e) +``` + +**Empirical Formulas for Oswald Efficiency**: + +For straight wings (Raymer approximation): +```python +e ≈ 1.78 * (1 - 0.045 * AR^0.68) - 0.64 # Raymer +e ≈ 0.7 + 0.1 * (AR - 6) / 4 # Linear approximation for AR 4-10 +``` + +For WW2 aircraft, using `e = 0.75-0.85` is reasonable. + +### Typical WW2 Fighter Values + +| Parameter | P-51D Mustang | Spitfire Mk IX | Bf 109G | Fw 190A | +|-----------|---------------|----------------|---------|---------| +| C_D0 | 0.0163-0.020 | 0.020-0.021 | 0.024-0.028 | 0.021-0.024 | +| AR | 5.86 | 6.48 | 6.07 | 5.74 | +| e (estimated) | 0.80 | 0.85 | 0.78 | 0.78 | +| K | 0.0686 | 0.058 | 0.069 | 0.071 | + +### Generalized Drag Polar (More Accurate) + +```python +C_D = C_D_min + K * (C_L - C_L_min_drag)² +``` + +Where `C_L_min_drag` is typically 0.1-0.2 for cambered airfoils. For simplicity in RL, the standard parabolic form is usually sufficient. + +### Mach Number Effects (Compressibility) + +For transonic flight (M > 0.6), drag rises significantly. Simple approximation: + +```python +if M < M_crit: + C_D0_M = C_D0 +elif M < 1.0: + # Transonic drag rise + C_D0_M = C_D0 * (1 + 10 * (M - M_crit)²) +else: + # Supersonic (unlikely for WW2) + C_D0_M = C_D0 * (1 + 0.5) + +# M_crit typically 0.7-0.75 for WW2 aircraft +``` + +For WW2 fighters operating below M=0.6 in normal combat, Mach effects can often be ignored. + +--- + +## 6. Lift Coefficient Modeling + +### Linear Region (Pre-Stall) + +In the linear region of the lift curve: + +```python +C_L = C_Lα * (α - α_0) +``` + +Where: +- `C_Lα` = Lift curve slope (per radian) +- `α` = Angle of attack +- `α_0` = Zero-lift angle of attack (negative for cambered airfoils) + +### Lift Curve Slope + +**2D Airfoil (Thin Airfoil Theory)**: +```python +c_lα = 2 * π # per radian ≈ 0.11 per degree +``` + +**3D Finite Wing (Lifting Line Theory)**: +```python +C_Lα = c_lα / (1 + c_lα / (π * AR)) # Approximation for elliptic wing +C_Lα = c_lα * AR / (AR + 2) # Alternative approximation +``` + +For typical AR=6 WW2 fighter: +```python +C_Lα ≈ 2π * 6 / (6 + 2) ≈ 4.71 per radian ≈ 0.082 per degree +``` + +### Stall Modeling + +**Critical**: For dogfighting, stall behavior matters! + +Simple piecewise model: +```python +def C_L(α, C_Lα, α_0, α_stall, C_L_max): + α_effective = α - α_0 + if α < α_stall: + return C_Lα * α_effective + else: + # Post-stall: lift drops off + # Simple linear dropoff + return C_L_max - 0.5 * (α - α_stall) * C_Lα +``` + +**Smooth stall model** (better for gradient-based methods): +```python +def C_L_smooth(α, C_Lα, α_0, α_stall, C_L_max): + """Sigmoid-smoothed stall transition""" + α_eff = α - α_0 + C_L_linear = C_Lα * α_eff + + # Smooth saturation using tanh + k = 10 # Sharpness of stall transition + C_L = C_L_max * np.tanh(C_L_linear / C_L_max) + + return C_L +``` + +**Typical values for WW2 fighters**: +- `α_stall` ≈ 14-18° (clean configuration) +- `C_L_max` ≈ 1.3-1.6 (clean), up to 2.0+ with flaps + +### Relating Lift Coefficient to Load Factor + +In a maneuver: +```python +C_L = (n * W) / (q * S) = (2 * n * W) / (ρ * V² * S) +``` + +So for a given load factor and speed, you can find the required C_L, and from that, the required α. + +--- + +## 7. Propulsion: Piston Engine and Propeller Models + +### Engine Power vs. Altitude + +Piston engines lose power with altitude due to decreasing air density. For naturally aspirated engines: + +```python +P(h) = P_SL * σ # Where σ = ρ/ρ_SL (density ratio) +``` + +For supercharged engines (most WW2 fighters): +```python +if h < h_critical: + P(h) = P_rated # Full power up to critical altitude +else: + P(h) = P_rated * (ρ(h) / ρ(h_critical)) +``` + +**Critical altitude** is where the supercharger can no longer maintain sea-level manifold pressure. Typical values: +- Single-stage supercharger: 15,000-20,000 ft +- Two-stage supercharger: 25,000-30,000 ft (stepped) + +### Propeller Model + +For prop aircraft, thrust depends on power and propeller efficiency: + +```python +T = η_p * P / V +``` + +Where `η_p` = propeller efficiency (typically 0.7-0.85 in cruise). + +**Problem**: At V=0, this gives infinite thrust! + +### Propeller Efficiency Model + +Simple model for variable-pitch propeller: +```python +def prop_efficiency(V, P, D_prop, rho): + """ + V: airspeed (m/s) + P: shaft power (W) + D_prop: propeller diameter (m) + rho: air density (kg/m³) + """ + # Advance ratio proxy + if V < 1: + V = 1 # Avoid division by zero + + # Maximum theoretical efficiency from momentum theory + T_ideal = P / V + disk_area = π * (D_prop/2)² + v_induced = np.sqrt(T_ideal / (2 * rho * disk_area)) + η_ideal = 1 / (1 + v_induced / V) + + # Practical efficiency (80-90% of ideal) + η_p = 0.85 * η_ideal + + # Clamp to reasonable range + η_p = np.clip(η_p, 0, 0.88) + + return η_p +``` + +### Simplified Thrust Model (Recommended for RL) + +Rather than modeling η_p complexly, use an empirical fit: + +```python +def thrust_model(V, P_max, V_max, rho, rho_SL): + """ + Simple thrust model for WW2 prop aircraft + + At low speed: T approaches static thrust + At high speed: T = η * P / V with η ≈ 0.8 + """ + # Power available (with altitude correction) + P_avail = P_max * (rho / rho_SL) # Simplified; use supercharger model for better accuracy + + # Static thrust approximation (from momentum theory) + # T_static ≈ (P² * rho * disk_area * 2)^(1/3) + D_prop = 3.0 # meters, typical WW2 fighter + disk_area = π * (D_prop/2)**2 + T_static = (P_avail**2 * 2 * rho * disk_area)**(1/3) + + # High-speed thrust + η_cruise = 0.80 + T_cruise = η_cruise * P_avail / max(V, 1) + + # Blend between static and cruise + # Smooth transition around V_transition + V_transition = 50 # m/s + blend = np.tanh(V / V_transition) + + T = T_static * (1 - blend) + T_cruise * blend + + return T +``` + +### Even Simpler: Polynomial Thrust Model + +For maximum speed, fit thrust vs velocity from known aircraft data: + +```python +def thrust_polynomial(V, T_static, T_max_speed, V_max): + """ + T(V) = T_static - k*V² approximately, where aircraft reaches V_max when T = D + """ + k = (T_static - T_max_speed) / V_max**2 + T = T_static - k * V**2 + return max(T, 0) +``` + +--- + +## 8. Atmospheric Model + +### International Standard Atmosphere (ISA) + +For troposphere (h < 11,000 m / 36,089 ft): + +```python +# Constants +T_SL = 288.15 # K (15°C) +P_SL = 101325 # Pa +ρ_SL = 1.225 # kg/m³ +g = 9.80665 # m/s² +R = 287.05 # J/(kg·K), specific gas constant for air +γ_air = 1.4 # Ratio of specific heats +λ = 0.0065 # Temperature lapse rate, K/m + +def atmosphere_troposphere(h): + """ + ISA atmospheric properties for h in meters (h < 11000 m) + """ + T = T_SL - λ * h + P = P_SL * (T / T_SL) ** (g / (R * λ)) + ρ = ρ_SL * (T / T_SL) ** (g / (R * λ) - 1) + a = np.sqrt(γ_air * R * T) # Speed of sound + + return T, P, ρ, a +``` + +**Numerical values**: +```python +# Exponents +g / (R * λ) = 9.80665 / (287.05 * 0.0065) ≈ 5.256 +g / (R * λ) - 1 ≈ 4.256 +``` + +So: +```python +T = 288.15 - 0.0065 * h +P = 101325 * (T / 288.15) ** 5.256 +ρ = 1.225 * (T / 288.15) ** 4.256 +``` + +### Density Ratio (Most Important for Performance) + +```python +σ = ρ / ρ_SL = (T / T_SL) ** 4.256 = (1 - h/44330) ** 4.256 +``` + +Quick approximation: +```python +σ ≈ np.exp(-h / 9000) # Rough exponential fit, h in meters +``` + +### Altitude in Feet (Common in Aviation) + +```python +def atmosphere_ISA_feet(h_ft): + """h_ft in feet""" + h_m = h_ft * 0.3048 + T = 288.15 - 0.0065 * h_m + σ = (T / 288.15) ** 4.256 + ρ = 1.225 * σ + a = np.sqrt(1.4 * 287.05 * T) + return T, ρ, a, σ +``` + +--- + +## 9. Performance Calculations + +### Maximum Level Flight Speed + +At maximum speed, Thrust = Drag: +```python +T_max = D = q * S * C_D = 0.5 * ρ * V_max² * S * C_D0 # At high speed, induced drag small +``` + +Solving: +```python +V_max ≈ sqrt(2 * T_max / (ρ * S * C_D0)) +``` + +### Stall Speed + +At stall, L = W at C_L_max: +```python +V_stall = sqrt(2 * W / (ρ * S * C_L_max)) +``` + +Or in terms of wing loading: +```python +V_stall = sqrt(2 * (W/S) / (ρ * C_L_max)) +``` + +### Best Climb Speed and Rate + +**Maximum rate of climb** occurs at the speed where excess power is maximum: +```python +P_excess = P_avail - P_required +P_required = D * V = 0.5 * ρ * V³ * S * C_D +``` + +For prop aircraft, best climb occurs roughly at: +```python +V_best_climb ≈ sqrt(2 * (W/S) / (ρ * sqrt(3 * C_D0 / K))) # Minimum power speed +``` + +**Rate of climb**: +```python +RC = P_excess / W = (P_avail - D*V) / W +``` + +Or in terms of specific excess power: +```python +P_s = (T - D) * V / W = V * (T/W - D/W) +RC = P_s # for small climb angles +``` + +### Turn Performance + +**Turn radius**: +```python +R = V² / (g * sqrt(n² - 1)) +``` + +For n >> 1: +```python +R ≈ V² / (g * n) +``` + +**Turn rate** (angular velocity): +```python +ω = V / R = g * sqrt(n² - 1) / V +``` + +**Maximum instantaneous turn rate** (limited by C_L_max): +```python +n_max_aero = q * S * C_L_max / W +ω_max = g * sqrt(n_max_aero² - 1) / V +``` + +**Maximum sustained turn rate** (limited by thrust = drag): +Must have T = D at the required C_L: +```python +# At sustained turn, T = D = q * S * (C_D0 + K * C_L²) +# And L = n * W = q * S * C_L +# Solve for n_sustained given T_avail +``` + +### Energy Management + +**Specific energy** (energy height): +```python +E_s = h + V² / (2 * g) +``` + +**Specific excess power**: +```python +P_s = dE_s/dt = (T - D) * V / W +``` + +This is THE key parameter for dogfighting—aircraft with higher P_s at a given flight condition will "win" the energy game. + +--- + +## 10. Implementation Strategies for High SPS + +### Vectorization is Everything + +Write all computations to operate on batched tensors: + +```python +import numpy as np + +def step_vectorized(state, action, aircraft_params): + """ + state: (N, 6) array of [V, γ, ψ, x, y, h] for N environments + action: (N, 3) array of [throttle, bank_cmd, pitch_cmd] + """ + V, γ, ψ, x, y, h = state.T + throttle, φ_cmd, α_cmd = action.T + + # Atmospheric properties (vectorized) + T_atm = 288.15 - 0.0065 * h + σ = (T_atm / 288.15) ** 4.256 + ρ = 1.225 * σ + + # Dynamic pressure + q = 0.5 * ρ * V**2 + + # Aerodynamic coefficients + C_L = compute_CL_vectorized(α_cmd, aircraft_params) + C_D = aircraft_params['CD0'] + aircraft_params['K'] * C_L**2 + + # Forces + L = q * aircraft_params['S'] * C_L + D = q * aircraft_params['S'] * C_D + T = compute_thrust_vectorized(V, throttle, σ, aircraft_params) + W = aircraft_params['mass'] * 9.81 + + # Equations of motion + n = L / W + dV_dt = (T - D) / aircraft_params['mass'] - 9.81 * np.sin(γ) + dγ_dt = (9.81 / V) * (n * np.cos(φ_cmd) - np.cos(γ)) + dψ_dt = (9.81 * n * np.sin(φ_cmd)) / (V * np.cos(γ) + 1e-6) + + # Kinematics + dx_dt = V * np.cos(γ) * np.cos(ψ) + dy_dt = V * np.cos(γ) * np.sin(ψ) + dh_dt = V * np.sin(γ) + + # Euler integration + dt = 0.02 # 50 Hz + new_state = state + dt * np.stack([dV_dt, dγ_dt, dψ_dt, dx_dt, dy_dt, dh_dt], axis=1) + + return new_state +``` + +### Avoid These Performance Killers + +1. **Python loops over environments** - Always use vectorized operations +2. **Conditionals on per-environment basis** - Use `np.where` or `np.clip` instead +3. **Complex lookup tables** - Replace with polynomial/analytical approximations +4. **Trigonometric functions** - Cache sin/cos when possible; consider small-angle approximations +5. **Division by small numbers** - Add epsilon to avoid NaN/Inf + +### JAX Implementation for GPU + +```python +import jax +import jax.numpy as jnp +from functools import partial + +@partial(jax.jit, static_argnums=(2,)) +def step_jax(state, action, aircraft_params): + """JIT-compiled step function for maximum GPU performance""" + V, γ, ψ, x, y, h = state[..., 0], state[..., 1], state[..., 2], state[..., 3], state[..., 4], state[..., 5] + + # ... same logic as numpy version ... + + return new_state + +# Vectorize over batch dimension +step_batched = jax.vmap(step_jax, in_axes=(0, 0, None)) +``` + +### Numerical Integration + +For high SPS, use simple Euler integration with small timestep: + +```python +dt = 0.02 # 50 Hz (20ms timestep) +state_new = state + dt * state_derivative +``` + +For better accuracy without much overhead, use semi-implicit Euler: +```python +# Update velocities first +V_new = V + dt * dV_dt +# Use new velocity for positions +x_new = x + dt * V_new * cos(γ) * cos(ψ) +``` + +RK4 is typically overkill for RL training and adds 4x computational cost. + +### State Normalization + +Normalize states for neural network input: +```python +state_normalized = (state - state_mean) / state_std + +# Typical normalization values for WW2 dogfight: +# V: mean=150 m/s, std=50 m/s +# γ: mean=0, std=0.5 rad +# ψ: mean=π, std=π (or use sin/cos representation) +# x, y: mean=0, std=5000 m +# h: mean=3000 m, std=2000 m +``` + +--- + +## 11. WW2 Aircraft Reference Data + +### P-51D Mustang + +```python +P51D = { + 'name': 'P-51D Mustang', + 'mass': 4175, # kg (loaded) + 'S': 21.65, # m² wing area + 'b': 11.28, # m wingspan + 'AR': 5.86, # aspect ratio + 'CD0': 0.0170, # zero-lift drag + 'e': 0.80, # Oswald efficiency + 'K': 0.068, # induced drag factor + 'CL_max': 1.49, # max lift coefficient (clean) + 'CL_alpha': 4.7, # per radian + 'alpha_stall': 16, # degrees + 'P_max': 1230000, # W (1650 hp at WEP) + 'h_critical': 7620, # m (25,000 ft) with 2-stage supercharger + 'V_max': 180, # m/s (703 km/h at altitude) + 'V_stall': 46, # m/s (100 mph clean) + 'RC_max': 17.5, # m/s (3450 ft/min) + 'service_ceiling': 12770, # m (41,900 ft) + 'n_limit': 8.0, # structural g limit +} +``` + +### Supermarine Spitfire Mk IX + +```python +SpitfireIX = { + 'name': 'Spitfire Mk IX', + 'mass': 3400, # kg (loaded) + 'S': 22.48, # m² wing area + 'b': 11.23, # m wingspan (clipped) + 'AR': 5.61, # aspect ratio + 'CD0': 0.0210, # zero-lift drag + 'e': 0.85, # Oswald efficiency (elliptical wing) + 'K': 0.067, # induced drag factor + 'CL_max': 1.36, # max lift coefficient + 'CL_alpha': 4.5, # per radian + 'alpha_stall': 15, # degrees + 'P_max': 1100000, # W (1475 hp) + 'h_critical': 6100, # m (20,000 ft) + 'V_max': 182, # m/s (657 km/h) + 'V_stall': 42, # m/s (82 kt) + 'RC_max': 21, # m/s (4100 ft/min) + 'service_ceiling': 13100, # m (43,000 ft) + 'n_limit': 8.0, +} +``` + +### Messerschmitt Bf 109G-6 + +```python +Bf109G = { + 'name': 'Bf 109G-6', + 'mass': 3100, # kg (loaded) + 'S': 16.05, # m² wing area + 'b': 9.92, # m wingspan + 'AR': 6.13, # aspect ratio + 'CD0': 0.0260, # zero-lift drag (higher due to design) + 'e': 0.78, # Oswald efficiency + 'K': 0.066, # induced drag factor + 'CL_max': 1.52, # max lift coefficient + 'CL_alpha': 4.8, # per radian + 'alpha_stall': 17, # degrees + 'P_max': 1050000, # W (1410 hp) + 'h_critical': 5700, # m (18,700 ft) + 'V_max': 170, # m/s (621 km/h) + 'V_stall': 50, # m/s (97 kt) + 'RC_max': 19, # m/s (3750 ft/min) + 'service_ceiling': 11550, # m (37,900 ft) + 'n_limit': 7.5, +} +``` + +### Focke-Wulf Fw 190A-8 + +```python +Fw190A = { + 'name': 'Fw 190A-8', + 'mass': 4400, # kg (loaded) + 'S': 18.30, # m² wing area + 'b': 10.51, # m wingspan + 'AR': 6.04, # aspect ratio + 'CD0': 0.0220, # zero-lift drag + 'e': 0.78, # Oswald efficiency + 'K': 0.068, # induced drag factor + 'CL_max': 1.45, # max lift coefficient + 'CL_alpha': 4.6, # per radian + 'alpha_stall': 16, # degrees + 'P_max': 1270000, # W (1700 hp with MW 50) + 'h_critical': 6300, # m (20,700 ft) + 'V_max': 171, # m/s (615 km/h) + 'V_stall': 55, # m/s (107 kt) + 'RC_max': 15, # m/s (2950 ft/min) + 'service_ceiling': 10300, # m (33,800 ft) + 'n_limit': 8.5, +} +``` + +### Mitsubishi A6M5 Zero + +```python +A6M5 = { + 'name': 'A6M5 Zero', + 'mass': 2750, # kg (loaded) + 'S': 22.44, # m² wing area + 'b': 11.0, # m wingspan + 'AR': 5.39, # aspect ratio + 'CD0': 0.0230, # zero-lift drag + 'e': 0.80, # Oswald efficiency + 'K': 0.074, # induced drag factor + 'CL_max': 1.40, # max lift coefficient + 'CL_alpha': 4.3, # per radian + 'alpha_stall': 15, # degrees + 'P_max': 840000, # W (1130 hp) + 'h_critical': 4500, # m (14,800 ft) + 'V_max': 156, # m/s (565 km/h) + 'V_stall': 40, # m/s (78 kt) + 'RC_max': 16, # m/s (3150 ft/min) + 'service_ceiling': 11740, # m (38,520 ft) + 'n_limit': 7.0, # Structural limit (lighter construction) +} +``` + +--- + +## 12. Validation and Sanity Checks + +### Must-Pass Tests + +Before deploying the environment, verify these behaviors: + +1. **Level flight equilibrium**: At trim conditions, aircraft should maintain altitude + ```python + assert abs(dh_dt) < 0.1 # m/s at trim + ``` + +2. **Stall speed matches data**: + ```python + V_stall_computed = np.sqrt(2 * W / (ρ_SL * S * CL_max)) + assert abs(V_stall_computed - V_stall_data) / V_stall_data < 0.05 + ``` + +3. **Max speed matches data** (approximately): + ```python + # At altitude where V_max occurs + V_max_computed = compute_max_speed(h_optimal) + assert abs(V_max_computed - V_max_data) / V_max_data < 0.10 + ``` + +4. **Turn physics are correct**: + ```python + # 60° bank should give 2g and specific turn radius + n = 1 / np.cos(np.radians(60)) + assert abs(n - 2.0) < 0.01 + R = V**2 / (g * np.sqrt(n**2 - 1)) + # At V=100 m/s, R ≈ 588 m + ``` + +5. **Energy conservation** (with zero thrust/drag): + ```python + E_s = h + V**2 / (2*g) + # With T=D=0, dE_s/dt should be zero + ``` + +6. **Climb rate matches data**: + ```python + RC_computed = compute_max_RC(h=0) + assert abs(RC_computed - RC_max_data) / RC_max_data < 0.15 + ``` + +### Behavioral Checks for Dogfighting + +1. **Energy advantage matters**: Aircraft starting with more altitude/speed should have an advantage +2. **Turn fights favor appropriate aircraft**: Low wing-loading aircraft should out-turn heavy ones +3. **Boom-and-zoom works**: Fast diving attacks followed by climb-away should be viable +4. **Stall is dangerous**: Aircraft that stall should lose controllability temporarily +5. **Altitude matters**: Engine performance should degrade at high altitude + +--- + +## 13. Sources and References + +### Primary Textbooks + +1. **Anderson, J.D.** - "Introduction to Flight" (McGraw-Hill) + - Chapters on aircraft performance, drag polar, climb/turn performance + - Standard reference for undergraduate aerodynamics + +2. **Stevens, B.L. & Lewis, F.L.** - "Aircraft Control and Simulation" (Wiley, 3rd Ed. 2015) + - Gold standard for flight dynamics modeling + - F-16 model used in many research implementations + - GitHub implementations: [isrlab/F16-Model-Matlab](https://github.com/isrlab/F16-Model-Matlab) + +3. **Raymer, D.P.** - "Aircraft Design: A Conceptual Approach" (AIAA) + - Empirical formulas for Oswald efficiency, drag estimation + - Excellent for quick approximations + +4. **Roskam, J.** - "Methods for Estimating Drag Polars of Subsonic Airplanes" + - Detailed component buildup methods + +5. **Stengel, R.E.** - "Flight Dynamics" (Princeton University Press) + - Excellent lecture notes available at: https://stengel.mycpanel.princeton.edu/ + - MATLAB code for 6-DOF simulation: [FLIGHTv2](https://stengel.mycpanel.princeton.edu/FDcodeB.html) + +### Open Source Flight Dynamics Models + +1. **JSBSim** - https://github.com/JSBSim-Team/jsbsim + - Industry-standard open-source FDM + - Used in FlightGear, DARPA ACE program + - XML-based aircraft configuration files + - Has WW2 aircraft models (P-51, etc.) + +2. **AeroBenchVV** - https://github.com/pheidlauf/AeroBenchVV + - F-16 model for verification and validation + - MATLAB implementation of Stevens & Lewis model + +3. **F16 Flight Dynamics (Python/C++)** - https://github.com/EthanJamesLew/f16-flight-dynamics + - Efficient C++ implementation with Python bindings + - Good reference for high-performance implementation + +### RL Environment Implementations + +1. **LAG (Light Aircraft Game)** - https://github.com/liuqh16/LAG + - JSBSim-based air combat environment + - PPO/MAPPO implementations included + - Good reference for reward shaping in dogfights + +2. **BVR Gym** - https://arxiv.org/html/2403.17533 + - Beyond-visual-range air combat environment + - Built on JSBSim with OpenAI Gym interface + +3. **Tunnel** - https://arxiv.org/html/2505.01953v1 + - Lightweight F-16 RL environment + - Focus on simplicity and accessibility + +4. **DBRL** - https://github.com/mrwangyou/DBRL + - Dogfighting benchmark for RL research + +### High-Performance RL Infrastructure + +1. **EnvPool** - https://github.com/sail-sg/envpool + - C++-based parallel environment execution + - 1M+ steps/second demonstrated + - Good patterns for vectorized environments + +2. **Isaac Gym** - https://github.com/isaac-sim/IsaacGymEnvs + - GPU-accelerated physics simulation + - Demonstrates full GPU pipeline for RL + +3. **PufferLib** - https://github.com/PufferAI/PufferLib + - Clean, fast RL training framework + - Good target platform for implementation + +### WW2 Aircraft Data + +1. **WWII Aircraft Performance** - https://www.wwiiaircraftperformance.org/ + - Comprehensive primary source documents + - Flight test data, performance charts + +2. **CFD Evaluation of WW2 Fighters** - Lednicer (1995) + - ResearchGate: "A CFD evaluation of three prominent World War II fighter aircraft" + - Spitfire, P-51, Fw 190 aerodynamic comparison + +3. **Aerodynamics of the Spitfire** - Royal Aeronautical Society + - Detailed analysis of Spitfire aerodynamics + - CD0 ≈ 0.020-0.021 for Spitfire + +### Atmospheric Models + +1. **International Standard Atmosphere** - ISO 2533:1975 + - Official ISA specification + - Wikipedia summary is accurate and sufficient + +2. **ICAO Standard Atmosphere** - ICAO Doc 7488-CD + - Extended to 80 km altitude + +--- + +## Quick Reference Card + +### Core Equations (Copy-Paste Ready) + +```python +# === ATMOSPHERE (ISA, troposphere) === +T = 288.15 - 0.0065 * h # Temperature [K], h in meters +σ = (T / 288.15) ** 4.256 # Density ratio +ρ = 1.225 * σ # Density [kg/m³] +a = 20.05 * np.sqrt(T) # Speed of sound [m/s] + +# === AERODYNAMICS === +q = 0.5 * ρ * V**2 # Dynamic pressure [Pa] +C_L = C_Lα * α # Lift coefficient (linear region) +C_D = C_D0 + K * C_L**2 # Drag polar +L = q * S * C_L # Lift [N] +D = q * S * C_D # Drag [N] + +# === PROPULSION (simple) === +P = P_max * σ * throttle # Power available [W] +T = η_p * P / V # Thrust [N], η_p ≈ 0.80 + +# === PERFORMANCE === +V_stall = np.sqrt(2 * W / (ρ * S * C_L_max)) # Stall speed +R_turn = V**2 / (g * np.sqrt(n**2 - 1)) # Turn radius +ω_turn = g * np.sqrt(n**2 - 1) / V # Turn rate [rad/s] +RC = (P * η_p - D * V) / W # Rate of climb [m/s] + +# === EQUATIONS OF MOTION (3DOF point mass) === +dV_dt = (T - D) / m - g * np.sin(γ) +dγ_dt = (g / V) * (n * np.cos(φ) - np.cos(γ)) +dψ_dt = g * n * np.sin(φ) / (V * np.cos(γ)) +dx_dt = V * np.cos(γ) * np.cos(ψ) +dy_dt = V * np.cos(γ) * np.sin(ψ) +dh_dt = V * np.sin(γ) +``` + +### Typical Values to Remember + +| Parameter | Typical Value | Notes | +|-----------|--------------|-------| +| C_D0 | 0.017-0.028 | Lower = cleaner aircraft | +| e (Oswald) | 0.75-0.85 | Elliptic wing ≈ 0.85 | +| K | 0.06-0.08 | K = 1/(π·AR·e) | +| C_L_max | 1.3-1.6 | Clean configuration | +| C_Lα | 4.5-5.0 /rad | 3D wing | +| α_stall | 14-18° | Depends on airfoil | +| η_propeller | 0.75-0.85 | Cruise conditions | +| σ at 20,000 ft | 0.53 | Density ratio | +| σ at 30,000 ft | 0.37 | Density ratio | + +--- + +*Document prepared for Claude agents developing WW2 dogfighting RL environments. Focus on computational efficiency while maintaining physical plausibility.* diff --git a/pufferlib/ocean/dogfight/AUTOPILOT_TODO.md b/pufferlib/ocean/dogfight/AUTOPILOT_TODO.md new file mode 100644 index 000000000..a5b0c1d7d --- /dev/null +++ b/pufferlib/ocean/dogfight/AUTOPILOT_TODO.md @@ -0,0 +1,172 @@ +# Autopilot System TODO + +Technical debt and future improvements for the target aircraft autopilot system. + +--- + +## Critical Issues + +### 1. Python/C Enum Sync Problem +**Risk: Silent breakage if enums diverge** + +```c +// autopilot.h +typedef enum { AP_STRAIGHT = 0, AP_LEVEL, ... } AutopilotMode; +``` +```python +# dogfight.py +class AutopilotMode: + STRAIGHT = 0 # Must manually match C +``` + +**Fix options:** +- [ ] Generate Python constants from C header at build time +- [ ] Add runtime validation that checks enum values match +- [ ] Add static_assert in C for enum count + +### 2. ~~No Vectorized set_autopilot~~ DONE (80bcf31e) +```python +def set_autopilot(self, env_idx=0, ...): # Must call N times for N envs +``` + +**Fix:** +- [x] Accept `env_idx=None` to mean "all environments" +- [x] Add C binding `vec_set_autopilot()` for efficiency + +### 3. force_state() Doesn't Reset PID State +When teleporting plane via `force_state()`, autopilot PID state (`prev_vz`, `prev_bank_error`) retains stale values causing derivative spikes. + +**Fix:** +- [ ] Reset autopilot PID state in `force_state()` C function +- [ ] Or add `reset_pid` parameter to force_state + +### 4. No Mode Bounds Check +Invalid mode values (e.g., `mode=99`) silently become AP_STRAIGHT. + +**Fix:** +- [ ] Add bounds check in `autopilot_set_mode()` +- [ ] Return error or clamp to valid range + +--- + +## Curriculum Learning Gaps + +### ~~Mode Weights for Non-Uniform Selection~~ DONE (0a1c2e6d) +```python +env.set_mode_weights(level=0.5, turn_left=0.25, turn_right=0.25, climb=0.0, descend=0.0) +``` + +**Tasks:** +- [x] Add `mode_weights` array to AutopilotState +- [x] Implement weighted random selection in `autopilot_randomize()` +- [x] Add Python API: `set_mode_weights()` +- [x] Default weights = uniform +- [x] Add `get_autopilot_mode()` for testing/debugging +- [x] Add unit test in test_flight.py + +### Per-Episode Parameter Variance +Bank angle and climb rate are fixed at `set_autopilot()` time. + +**Need:** +- [ ] `bank_deg_min`, `bank_deg_max` fields - randomize within range each reset +- [ ] `climb_rate_min`, `climb_rate_max` fields +- [ ] `throttle_min`, `throttle_max` fields + +### Difficulty Abstraction +Single 0.0-1.0 difficulty scale that controls multiple parameters. + +```python +# Desired API: +env.set_difficulty(0.3) # Maps to mode weights, bank angles, etc. +``` + +**Tasks:** +- [ ] Design difficulty mapping (what parameters at what difficulty) +- [ ] Implement `set_difficulty()` in Python +- [ ] Document difficulty levels + +### Per-Environment Difficulty +In vectorized envs, all opponents currently share settings. + +**Need:** +- [ ] Allow different autopilot settings per env index +- [ ] Or difficulty gradient across env indices + +--- + +## Test Integration Gaps + +### Player Autopilot for test_flight.py +Currently autopilot only controls opponent. test_flight.py tests player with Python PID. + +**Tasks:** +- [ ] Add `AutopilotState player_ap` to Dogfight struct +- [ ] Add `player_autopilot_enabled` flag +- [ ] Add Python API `set_player_autopilot()` +- [ ] Migrate test_flight.py PID tests to use C autopilot + +### Query Autopilot State +~~No way to verify autopilot mode from Python.~~ Partial (0a1c2e6d) + +**Tasks:** +- [x] Add `get_autopilot_mode()` C binding +- [ ] Return current mode, bank, climb_rate, etc. (only mode implemented) +- [x] Add to Python wrapper + +--- + +## Missing Maneuvers + +### Basic Extensions +- [ ] `AP_WEAVE` - S-turns with configurable period +- [ ] `AP_CLIMBING_TURN` - Combined climb + bank +- [ ] `AP_DESCENDING_TURN` - Combined descent + bank + +### Evasive Maneuvers (Hard difficulty) +- [ ] `AP_JINK` - Random direction changes at intervals +- [ ] `AP_BREAK` - Hard turn away from threat +- [ ] `AP_BARREL_ROLL` - Defensive roll + +### Pursuit Behaviors +- [ ] `AP_PURSUIT` - Turn toward player +- [ ] `AP_LEAD_PURSUIT` - Aim ahead of player +- [ ] `AP_LAG_PURSUIT` - Trail behind player + +### Opponent Combat +- [ ] Enable opponent firing (`actions[4]` currently hardcoded to -1) +- [ ] Accuracy scaling based on difficulty +- [ ] Reaction time/delay modeling + +--- + +## Code Quality + +### Explicit Random Mode List +Current implicit range assumption is fragile: +```c +int mode = 1 + (rand() % (AP_COUNT - 2)); // Assumes modes 1..5 are valid +``` + +**Fix:** +- [ ] Replace with explicit array of randomizable modes +```c +static const AutopilotMode RANDOM_MODES[] = {AP_LEVEL, AP_TURN_LEFT, ...}; +``` + +### PID Derivative Smoothing +First step after reset may have derivative spike. + +**Fix:** +- [ ] Initialize `prev_vz` to current `vz` on first step +- [ ] Or use filtered derivative + +--- + +## Priority Order + +1. ~~**High:** Vectorized set_autopilot~~ DONE (80bcf31e) +2. ~~**High:** Mode weights (core curriculum feature)~~ DONE (0a1c2e6d) +3. **Medium:** Per-episode parameter variance +4. **Medium:** Player autopilot for tests +5. **Low:** Additional maneuvers +6. **Low:** Opponent combat diff --git a/pufferlib/ocean/dogfight/BISECTION.md b/pufferlib/ocean/dogfight/BISECTION.md new file mode 100644 index 000000000..dd0fce9dd --- /dev/null +++ b/pufferlib/ocean/dogfight/BISECTION.md @@ -0,0 +1,287 @@ +# Training Regression Bisection + +## Problem + +Agent used to train to ~1.0 kills easily. After adding features (curriculum, stages), even the simplest scenario (TAIL_CHASE) only achieves ~0.5 kills. + +**Goal**: Find the commit where training regressed. + +--- + +## Instructions + +### Git rules: + +- **OK**: `git stash`, `git checkout`, `git diff`, `git log` +- **NOT OK**: `git add`, `git commit`, `git push` - DO NOT modify git history + +### For each commit you test: + +```bash +# 1. Stash any local changes if needed +git stash + +# 2. Checkout the commit +git checkout HASH + +# 3. Build +python setup.py build_ext --inplace --force + +# 4. Run training 3 times, each to a separate log +python -m pufferlib.pufferl train puffer_dogfight 2>&1 | tee pufferlib/ocean/dogfight/baselines/bisect_HASH_r1.log +python -m pufferlib.pufferl train puffer_dogfight 2>&1 | tee pufferlib/ocean/dogfight/baselines/bisect_HASH_r2.log +python -m pufferlib.pufferl train puffer_dogfight 2>&1 | tee pufferlib/ocean/dogfight/baselines/bisect_HASH_r3.log + +# 5. Return to working branch and restore stash +git checkout dogfight +git stash pop # if you stashed earlier +``` + +### How to fill in the table: + +1. **Kills**: Average the final `kills` metric from all 3 runs +2. **Return**: Average the final `episode_return` from all 3 runs +3. **Notes**: + - If one run is a major outlier (e.g., 0.3, 0.9, 0.35), note it: "outlier: 0.9" + - Note anything unusual: "no curriculum in this version", "different reward scale", etc. + +### Choosing which commit to test next: + +**DO NOT blindly bisect.** Use the commit messages to make informed choices. + +1. Start with **a4172661** (baseline) to confirm training once worked +2. Use binary search as a GUIDE, but prioritize commits with meaningful names: + - "Fix Reward and Score" - likely affects kills metric + - "Simplify Penalties and Rewards" - definitely relevant + - "Rewards Fixed - Sweepable" - relevant + - "Debug Prints" - probably NOT relevant, skip unless adjacent to regression +3. If bisect lands you on "Debug Prints" but "Fix Reward and Score" is one commit away, test the meaningful one instead +4. The goal is to UNDERSTAND what broke, not just find a hash + +**Think like a debugger**: What change COULD have broken training? Test those commits first. + +--- + +## Results + +Kills = 3-run average. Return = episode return average. Perf = older metric (same as kills in later commits). + +| Hash | Message | Kills | Perf | Return | Notes | +|------|---------|-------|------|--------|-------| +| 4e640ee0 | Trying to Fix Curriculum - Agent Trains Poorly | ~0.5 | | | Current state, KNOWN BAD | +| f6c821d7 | Still Trying to Fix Blowups | | | | | +| 2c3073f5 | Debug Prints | | | | | +| b68d1b22 | Simplify Penalties and Rewards | | | | 6 obs schemes, default=0 | +| b68_obs0 | ^ scheme 0 (size 12) | 0.35 | 0.35 | -9 | BAD, eps_per_stage=120 | +| b68_obs1 | ^ scheme 1 (size 17) | | | | | +| b68_obs2 | ^ scheme 2 (size 10) | | | | | +| b68_obs3 | ^ scheme 3 (size 10) | | | | | +| b68_obs4 | ^ scheme 4 (size 13) | | | | | +| b68_obs5 | ^ scheme 5 (size 15) | | | | | +| 9dca5c67 | Reduce Prints | | | | | +| ab222bfc | Increase Batch Size for Speed | | | | eps_per_stage=15000 | +| ab2_obs5 | ^ scheme 5 (size 15) | 1.0 | 1.0 | -0.8 | **GOOD** last good commit | +| 7fd88f1c | Next Sweep Improvements - Likes to Aileron Roll too Much | | | | 6 obs, eps_per_stage=60 | +| 7fd_obs5 | ^ scheme 5 (size 15) | 0.02 | 0.02 | -82 | **TERRIBLE** eps_per_stage=60 | +| 7fd_obs0 | ^ scheme 0, eps=100k | 0.62 | 0.62 | -23 | All 3 runs consistent | +| 7fd_obs1 | ^ scheme 1, eps=100k | 0.71 | 0.71 | -18 | outlier r3=0.84 | +| 7fd_obs2 | ^ scheme 2, eps=100k | 0.81 | 0.81 | -26 | outlier r2=0.99! | +| 7fd_obs3 | ^ scheme 3, eps=100k | 0.55 | 0.55 | -45 | WORST, outlier r3=0.30 | +| 7fd_obs4 | ^ scheme 4, eps=100k | 0.62 | 0.62 | -35 | All 3 runs consistent | +| 7fd_obs5 | ^ scheme 5, eps=100k | 0.78 | 0.78 | -14 | outlier r3=0.96 | +| 30fa9fed | Fix Obs 5 Schema and Adjust Penalties | 0.94 | 0.94 | -34 | GOOD, outlier r3=0.81 | +| 652ab7a6 | Fix Elevator Problems | | | | | +| fe7e26a2 | Roll Penalty - Elevator Might Be Inversed | | | | | +| bc728368 | New Obs Schemas - New Sweep Prep | | | | 6 obs schemes (0-5) | +| bc7_obs0 | ^ scheme 0 (size 12) | | | | | +| bc7_obs1 | ^ scheme 1 (size 17) | | | | | +| bc7_obs2 | ^ scheme 2 (size 10) | | | | | +| bc7_obs3 | ^ scheme 3 (size 10) | | | | | +| bc7_obs4 | ^ scheme 4 (size 13) | | | | | +| bc7_obs5 | ^ scheme 5 (size 15) | 1.0 | 1.0 | 0.1 | **GOOD** ini default, very short eps | +| 2606e20e | Apply Sweep df1 84 u5i33hej | | | | 6 obs schemes (0-5) | +| 260_obs0 | ^ scheme 0 (size 19) | | | | | +| 260_obs1 | ^ scheme 1 (size 21) | | | | | +| 260_obs2 | ^ scheme 2 (size 12) | | | | | +| 260_obs3 | ^ scheme 3 (size 17) | | | | | +| 260_obs4 | ^ scheme 4 (size 10) | | | | | +| 260_obs5 | ^ scheme 5 (size 43) | | | | | +| 17f18c19 | Fix Reward and Score | | | | kills tracking added here | +| 3cc5b588 | More Sweep Prep | | | | | +| a31d1dc7 | Fix Terminals and Loggin | | | | | +| 26709b93 | Preparing for Sweeps | | | | | +| 04dd0167 | Rewards Fixed - Sweepable | | | | 6 obs schemes, default=0 | +| 04d_obs0 | ^ scheme 0 (size 19) | 3.4 | 0.93 | 41 | **GOOD BASELINE** | +| 04d_obs1 | ^ scheme 1 (size 21) | | | | | +| 04d_obs2 | ^ scheme 2 (size 12) | | | | | +| 04d_obs3 | ^ scheme 3 (size 17) | | | | | +| 04d_obs4 | ^ scheme 4 (size 10) | | | | | +| 04d_obs5 | ^ scheme 5 (size 43) | | | | | +| 63a7aaed | Observation Schemas Swept | | | | | +| 0a1c2e6d | Weighted Random Actions | | | | | +| 80bcf31e | Vectorized Autopilot | | | | | +| 85980679 | Autopilot Seperate File | | | | | +| 374871df | Small Perf - Move cosf Out of Loop | | | | | +| 1131e836 | Simple Optimizations | | | | | +| 1c30c546 | Coordinated Turn Tests | | | | | +| 3582d2d4 | Physics in Own File - Test Flights | | | | | +| 95eb2efd | Moved Physics to File | | | | | +| b29bf5ac | Renamed md Files | | | | | +| 0116b97c | Physics model: incidence, comments, test suite | | | | | +| 332a9ae0 | Good Claude - Wireframe Planes | | | | | +| daaf9024 | Rendered with spheres or something | | | | | +| 49af2d49 | Reward Changes | | | | | +| a4172661 | Trains and Evals | 0 | N/A | -43 | no kills tracking yet | + +--- + +## Commit Summaries + +Run `git diff HASH~1 HASH` to see what changed. Summarize each commit as you test it. Keep in chronological order (newest first). + +### 4e640ee0 +(summarize after reviewing diff) + +### f6c821d7 +(summarize after reviewing diff) + +### 2c3073f5 +(summarize after reviewing diff) + +### b68d1b22 +(summarize after reviewing diff) + +### 9dca5c67 +(summarize after reviewing diff) + +### 7fd88f1c +(summarize after reviewing diff) + +### 30fa9fed +(summarize after reviewing diff) + +### 652ab7a6 +(summarize after reviewing diff) + +### fe7e26a2 +(summarize after reviewing diff) + +### bc728368 +(summarize after reviewing diff) + +### 2606e20e +(summarize after reviewing diff) + +### 17f18c19 +(summarize after reviewing diff) + +### 3cc5b588 +(summarize after reviewing diff) + +### a31d1dc7 +(summarize after reviewing diff) + +### 26709b93 +(summarize after reviewing diff) + +### 04dd0167 +Rewards Fixed - Sweepable. Proper kills/perf tracking. perf=0.93 (93% episodes get ≥1 kill), kills=3.4 avg, return=41. **GOOD BASELINE - training works here.** + +### 63a7aaed +(summarize after reviewing diff) + +### 0a1c2e6d +(summarize after reviewing diff) + +### 80bcf31e +(summarize after reviewing diff) + +### 85980679 +(summarize after reviewing diff) + +### 374871df +(summarize after reviewing diff) + +### 1131e836 +(summarize after reviewing diff) + +### 1c30c546 +(summarize after reviewing diff) + +### 3582d2d4 +(summarize after reviewing diff) + +### 95eb2efd +(summarize after reviewing diff) + +### b29bf5ac +(summarize after reviewing diff) + +### 0116b97c +(summarize after reviewing diff) + +### 332a9ae0 +(summarize after reviewing diff) + +### daaf9024 +(summarize after reviewing diff) + +### 49af2d49 +(summarize after reviewing diff) + +### a4172661 +Initial dogfight commit. Creates entire environment from scratch. No kills/perf tracking - only logs episode_return, episode_length, n. Returns: -43.9, -109.6, -57.4 (avg -70). Cannot use as baseline for kills metric. + +--- + +## Findings + +### Possible Cause: `episodes_per_stage` Config Change + +| Commit | episodes_per_stage | perf | +|--------|-------------------|------| +| 30fa9fed (GOOD) | 15000 | 0.94 | +| 7fd88f1c (BAD) | 60 | 0.02 | +| 7fd_edit (PARTIAL) | 100000 | 0.76 | +| b68d1b22 (BAD) | 120 | 0.35 | + +**Finding**: `episodes_per_stage` is a MAJOR factor (0.02 → 0.76 when increased), but code changes in 7fd88f1c also matter (0.76 vs 1.0 in last good commit). + +### First Bad Commit: 7fd88f1c + +**Config changes** (ab222bfc → 7fd88f1c): +- `episodes_per_stage`: 15000 → 60 (250x fewer!) +- `penalty_roll`: 0.0015 → 0.003 (2x) +- NEW: `penalty_aileron = 0.1` +- NEW: `penalty_bias = 0.01` +- NEW: `reward_approach = 0.005` +- NEW: `reward_level = 0.02` +- Swapped reward_firing_solution ↔ reward_tracking + +**Code changes**: 454 insertions, 101 deletions in dogfight.h, autopilot.h, etc. + +### All Obs Schemes at 7fd88f1c (eps_per_stage=100000) + +| Scheme | Size | r1 | r2 | r3 | Avg Perf | Notes | +|--------|------|-----|-----|-----|----------|-------| +| 0 | 12 | 0.62 | 0.63 | 0.62 | 0.62 | consistent | +| 1 | 17 | 0.64 | 0.63 | 0.84 | 0.71 | high variance | +| 2 | 10 | 0.80 | 0.99 | 0.64 | 0.81 | r2 nearly perfect! | +| 3 | 10 | 0.70 | 0.65 | 0.30 | 0.55 | WORST scheme | +| 4 | 13 | 0.66 | 0.58 | 0.62 | 0.62 | consistent | +| 5 | 15 | 0.70 | 0.67 | 0.96 | 0.78 | high variance | + +**Key Finding**: ALL obs schemes underperform vs last good commit (1.0). This is NOT just one bad scheme - it's a deeper code problem in 7fd88f1c. + +Observations: +- High variance across runs for all schemes +- Scheme 2 had one run hit 0.99, scheme 5 hit 0.96 - so 1.0 is achievable but inconsistent +- Scheme 3 is worst (0.55 avg) +- Problem affects all observation schemes equally + +### To Investigate + +1. ~~Test 7fd88f1c with `episodes_per_stage=100000` to isolate config vs code~~ **DONE**: perf varies by scheme, 0.55-0.81 +2. ~~Test all obs schemes at 7fd88f1c~~ **DONE**: ALL schemes underperform, not one bad scheme +3. Diff dogfight.h code between ab222bfc and 7fd88f1c to find the breaking change +4. Focus on: new penalties (aileron=0.1, bias=0.01), new rewards (approach, level), or code logic bugs diff --git a/pufferlib/ocean/dogfight/OBSERVATION_EXPERIMENTS.md b/pufferlib/ocean/dogfight/OBSERVATION_EXPERIMENTS.md new file mode 100644 index 000000000..3b62395a7 --- /dev/null +++ b/pufferlib/ocean/dogfight/OBSERVATION_EXPERIMENTS.md @@ -0,0 +1,452 @@ +# Observation Space Experiments + +Design doc for systematic observation scheme comparison. + +**Goal**: Empirically determine the best observation representation for dogfight learning. + +**Method**: PufferLib sweep across observation schemes, all else constant. + +--- + +## Observation Scheme Candidates + +### Scheme 0: CURRENT (Baseline) +World-frame Cartesian. + +``` +Player: pos(3), vel(3), quat(4), up(3) = 13 +Relative: pos(3), vel(3) = 6 +Total: 19 +``` + +Issues: +- Rel pos/vel in world frame - agent must learn quaternion rotation +- No direct aiming info + +--- + +### Scheme 1: BODY_FRAME +Transform relative quantities to player body frame. + +``` +Player: pos(3), vel_body(3), quat(4), up(3) = 13 +Relative: pos_body(3), vel_body(3) = 6 +Aiming: aim_dot(1), dist_norm(1) = 2 +Total: 21 +``` + +Computation: +```c +Quat q_inv = quat_inverse(p->ori); +Vec3 rel_pos_body = quat_rotate(q_inv, sub3(o->pos, p->pos)); +Vec3 rel_vel_body = quat_rotate(q_inv, sub3(o->vel, p->vel)); +Vec3 vel_body = quat_rotate(q_inv, p->vel); +``` + +Benefit: `rel_pos_body.x > 0` always means "ahead of me" + +--- + +### Scheme 2: ANGLES +Spherical coordinates - azimuth, elevation, distance. + +``` +Player: pos(3), speed(1), pitch(1), roll(1), yaw(1) = 7 +Target: azimuth(1), elevation(1), distance(1), closing_rate(1) = 4 +Opponent: heading_rel(1) = 1 +Total: 13 +``` + +Computation: +```c +// Target angles in body frame +Vec3 to_target_body = quat_rotate(q_inv, rel_pos); +float azimuth = atan2f(to_target_body.y, to_target_body.x); // -pi to pi +float elevation = asinf(to_target_body.z / norm3(to_target_body)); // -pi/2 to pi/2 +float distance = norm3(rel_pos); + +// Player attitude (Euler from quaternion) +// pitch = asin(2*(qw*qy - qz*qx)) +// roll = atan2(2*(qw*qx + qy*qz), 1 - 2*(qx*qx + qy*qy)) +// yaw = atan2(2*(qw*qz + qx*qy), 1 - 2*(qy*qy + qz*qz)) +``` + +Benefit: Directly answers "how far off am I pointing?" +Risk: Discontinuities at ±180° azimuth, ±90° elevation + +--- + +### Scheme 3: CONTROL_ERROR +What control inputs would point at target? + +``` +Player: pos(3), speed(1), quat(4), up(3) = 11 +Aiming: pitch_error(1), yaw_error(1), roll_to_turn(1), dist(1) = 4 +Target: closing_rate(1), opp_heading_rel(1) = 2 +Total: 17 +``` + +Computation: +```c +// Error = angle from nose to target +Vec3 to_target_body = quat_rotate(q_inv, rel_pos); +Vec3 to_target_norm = normalize3(to_target_body); + +// Pitch error: positive = need to pitch up +float pitch_error = asinf(to_target_norm.z); + +// Yaw error: positive = need to yaw right +float yaw_error = atan2f(to_target_norm.y, to_target_norm.x); + +// Roll to align turn: if target is right, roll right helps +float roll_to_turn = ... // bank angle that would help turn toward target +``` + +Benefit: Minimal transformation from observation to action +Risk: May lose situational awareness info + +--- + +### Scheme 4: REALISTIC (Cockpit Instruments) +Only what a WW2 pilot could actually see/read. + +``` +Instruments: airspeed(1), altitude(1), pitch(1), roll(1) = 4 +Gunsight: target_az(1), target_el(1), target_size(1) = 3 +Visual: target_aspect(1), horizon_visible(1) = 2 +Total: 10 +``` + +Benefit: Most constrained - if this works, simpler is better +Risk: May lack critical information + +--- + +### Scheme 5: MAXIMALIST +Everything potentially useful, let network sort it out. + +``` +Player: pos(3), vel_world(3), vel_body(3), quat(4), up(3), + speed(1), pitch(1), roll(1), yaw(1) = 20 +Target: rel_pos_world(3), rel_pos_body(3), rel_vel_world(3), rel_vel_body(3), + azimuth(1), elevation(1), distance(1), aim_dot(1), closing_rate(1) = 17 +Opponent: opp_fwd_world(3), opp_fwd_body(3) = 6 +Total: 43 +``` + +Benefit: Network has all possible information +Risk: Larger network needed, slower training, redundancy + +--- + +## Implementation Strategy + +### Option A: Compile-Time Schemes +```c +#define OBS_SCHEME 1 // 0=CURRENT, 1=BODY_FRAME, 2=ANGLES, etc. + +#if OBS_SCHEME == 0 +#define OBS_SIZE 19 +#elif OBS_SCHEME == 1 +#define OBS_SIZE 21 +// ... +#endif + +void compute_observations(Dogfight *env) { +#if OBS_SCHEME == 0 + // Current implementation +#elif OBS_SCHEME == 1 + // Body frame implementation +#endif +} +``` + +Pro: No runtime overhead +Con: Requires recompilation for each scheme + +### Option B: Runtime Config +```c +typedef enum { + OBS_CURRENT = 0, + OBS_BODY_FRAME, + OBS_ANGLES, + OBS_CONTROL_ERROR, + OBS_REALISTIC, + OBS_MAXIMALIST +} ObsScheme; + +// In Dogfight struct +ObsScheme obs_scheme; +int obs_size; // Set at init based on scheme +``` + +Pro: Single binary, config-driven sweeps +Con: Slight runtime overhead, more complex code + +### Recommendation: Option B (Runtime Config) + +Enables PufferLib sweep integration without recompilation. + +--- + +## PufferLib Sweep Integration + +### INI Config Addition +```ini +[dogfight] +obs_scheme = 1 # 0-5, maps to enum +``` + +### Sweep Config +```yaml +# sweeps/dogfight_obs.yaml +base_config: puffer_dogfight +sweep: + obs_scheme: [0, 1, 2, 3, 4, 5] + +# Or focused comparison: +sweep: + obs_scheme: [0, 1, 2] # Current vs Body Frame vs Angles +``` + +### Python Integration +```python +# dogfight.py +def __init__(self, ..., obs_scheme=0): + self.obs_scheme = obs_scheme + self.obs_size = OBS_SIZES[obs_scheme] # Lookup table + + self.observation_space = gymnasium.spaces.Box( + low=-1, high=1, + shape=(self.obs_size,), + dtype=np.float32 + ) +``` + +--- + +## Experimental Protocol + +### Phase 1: Focused Comparison (Recommended First) +Compare 3 most promising schemes: +- Scheme 0 (CURRENT) - baseline +- Scheme 1 (BODY_FRAME) - expected best +- Scheme 2 (ANGLES) - alternative representation + +Run: 3 schemes × 3 seeds × 100M steps = 9 runs + +### Phase 2: Extended Comparison +If Phase 1 inconclusive, add: +- Scheme 3 (CONTROL_ERROR) +- Scheme 5 (MAXIMALIST) + +### Phase 3: Ablation Studies +On best scheme, ablate individual observations: +- Remove aim_dot: does it hurt? +- Remove opponent heading: does it matter? +- etc. + +--- + +## Metrics + +Primary: +- Episode return (mean, std over 3 seeds) +- Kills per episode +- Accuracy (hits/shots) + +Secondary: +- Learning speed (steps to reach threshold performance) +- Final policy variance +- Behavioral analysis (does agent pursue? aim? fire appropriately?) + +--- + +## Expected Outcomes (Pre-Experiment Hypotheses) + +| Scheme | Hypothesis | +|--------|------------| +| 0 CURRENT | Baseline - agent struggles with aiming | +| 1 BODY_FRAME | **Best** - easiest for network to learn spatial relationships | +| 2 ANGLES | Good for aiming, may have discontinuity issues | +| 3 CONTROL_ERROR | Fast initial learning, may plateau | +| 4 REALISTIC | Insufficient information | +| 5 MAXIMALIST | Works but slower, needs bigger network | + +--- + +## Actual Results (2026-01-14) + +| Scheme | Obs | Return | Kills | Combat? | Notes | +|--------|-----|--------|-------|---------|-------| +| 0 WORLD_FRAME | 19 | +30.30 | **0.30** | **YES** | Best for combat | +| 1 BODY_FRAME | 21 | +22.27 | 0.12 | Weak | Worse than baseline | +| 2 ANGLES | 12 | +128.90 | 0.11 | No | Exploits pursuit reward | +| 3 CONTROL_ERROR | 17 | +166.40 | 0.00 | No | Exploits pursuit reward | +| 4 REALISTIC | 10 | +168.39 | 0.00 | No | Exploits pursuit reward | +| 5 MAXIMALIST | 43 | +83.39 | 0.13 | 1/3 | High variance | + +**Key Finding:** WORLD_FRAME (scheme 0) is the only scheme where all 3 runs consistently learned combat. The "engineered" observation schemes (2-4) achieved very high return by exploiting pursuit reward shaping without learning to fire. The hypothesis that BODY_FRAME would be best was **wrong** - the network actually learns combat better with world-frame observations. + +**Insight:** The pursuit reward shaping is too strong. Agents can maximize return by chasing without ever shooting. World-frame observations may make this exploitation harder because the spatial relationships aren't pre-computed for the agent. + +--- + +## Implementation Checklist + +- [x] Add `obs_scheme` to Dogfight struct +- [x] Add `obs_scheme` to INI config parsing +- [x] Implement OBS_SIZE lookup table +- [x] Implement compute_observations_scheme_X() for each scheme (all 6) +- [x] Update Python observation_space based on scheme +- [x] Create sweep config file +- [x] Run Phase 1 experiments (all 6 schemes, 3 runs each) +- [x] Analyze results +- [x] Document findings in BASELINE_SUMMARY.md + +--- + +## Code Sketch: Body Frame Implementation + +```c +void compute_observations_body_frame(Dogfight *env) { + Plane *p = &env->player; + Plane *o = &env->opponent; + + // Inverse quaternion for world->body transforms + Quat q_inv = {p->ori.w, -p->ori.x, -p->ori.y, -p->ori.z}; + + // Player state + Vec3 vel_body = quat_rotate(q_inv, p->vel); + Vec3 up = quat_rotate(p->ori, vec3(0, 0, 1)); + + // Relative state in body frame + Vec3 rel_pos = sub3(o->pos, p->pos); + Vec3 rel_vel = sub3(o->vel, p->vel); + Vec3 rel_pos_body = quat_rotate(q_inv, rel_pos); + Vec3 rel_vel_body = quat_rotate(q_inv, rel_vel); + + // Aiming info + float dist = norm3(rel_pos); + Vec3 fwd = vec3(1, 0, 0); // In body frame, forward is always +X + Vec3 to_target = normalize3(rel_pos_body); + float aim_dot = dot3(to_target, fwd); // 1.0 = perfect aim + + int i = 0; + // Player position (world frame - needed for bounds awareness) + env->observations[i++] = p->pos.x * INV_WORLD_HALF_X; + env->observations[i++] = p->pos.y * INV_WORLD_HALF_Y; + env->observations[i++] = p->pos.z * INV_WORLD_MAX_Z; + + // Player velocity (body frame) + env->observations[i++] = vel_body.x * INV_MAX_SPEED; // Forward speed + env->observations[i++] = vel_body.y * INV_MAX_SPEED; // Sideslip + env->observations[i++] = vel_body.z * INV_MAX_SPEED; // Climb rate + + // Player orientation + env->observations[i++] = p->ori.w; + env->observations[i++] = p->ori.x; + env->observations[i++] = p->ori.y; + env->observations[i++] = p->ori.z; + + // Player up vector (world frame) + env->observations[i++] = up.x; + env->observations[i++] = up.y; + env->observations[i++] = up.z; + + // Relative position (body frame) - THE KEY CHANGE + env->observations[i++] = rel_pos_body.x * INV_WORLD_HALF_X; + env->observations[i++] = rel_pos_body.y * INV_WORLD_HALF_Y; + env->observations[i++] = rel_pos_body.z * INV_WORLD_MAX_Z; + + // Relative velocity (body frame) + env->observations[i++] = rel_vel_body.x * INV_MAX_SPEED; + env->observations[i++] = rel_vel_body.y * INV_MAX_SPEED; + env->observations[i++] = rel_vel_body.z * INV_MAX_SPEED; + + // Aiming helpers + env->observations[i++] = aim_dot; // -1 to 1, 1 = perfect aim + env->observations[i++] = dist / GUN_RANGE; // ~0-4, 1 = at gun range + + // OBS_SIZE = 21 for this scheme +} +``` + +--- + +## Code Sketch: Angles Implementation + +```c +void compute_observations_angles(Dogfight *env) { + Plane *p = &env->player; + Plane *o = &env->opponent; + + Quat q_inv = {p->ori.w, -p->ori.x, -p->ori.y, -p->ori.z}; + + // Player Euler angles from quaternion + float pitch = asinf(2.0f * (p->ori.w * p->ori.y - p->ori.z * p->ori.x)); + float roll = atan2f(2.0f * (p->ori.w * p->ori.x + p->ori.y * p->ori.z), + 1.0f - 2.0f * (p->ori.x * p->ori.x + p->ori.y * p->ori.y)); + float yaw = atan2f(2.0f * (p->ori.w * p->ori.z + p->ori.x * p->ori.y), + 1.0f - 2.0f * (p->ori.y * p->ori.y + p->ori.z * p->ori.z)); + + // Target in body frame -> spherical + Vec3 rel_pos = sub3(o->pos, p->pos); + Vec3 rel_pos_body = quat_rotate(q_inv, rel_pos); + float dist = norm3(rel_pos); + + float azimuth = atan2f(rel_pos_body.y, rel_pos_body.x); // -pi to pi + float r_horiz = sqrtf(rel_pos_body.x * rel_pos_body.x + rel_pos_body.y * rel_pos_body.y); + float elevation = atan2f(rel_pos_body.z, r_horiz); // -pi/2 to pi/2 + + // Closing rate + Vec3 rel_vel = sub3(p->vel, o->vel); // Note: p - o for closing + float closing_rate = dot3(rel_vel, normalize3(rel_pos)); + + // Opponent heading relative to player + Vec3 opp_fwd = quat_rotate(o->ori, vec3(1, 0, 0)); + Vec3 opp_fwd_body = quat_rotate(q_inv, opp_fwd); + float opp_heading_rel = atan2f(opp_fwd_body.y, opp_fwd_body.x); + + int i = 0; + // Player state + env->observations[i++] = p->pos.x * INV_WORLD_HALF_X; + env->observations[i++] = p->pos.y * INV_WORLD_HALF_Y; + env->observations[i++] = p->pos.z * INV_WORLD_MAX_Z; + env->observations[i++] = norm3(p->vel) * INV_MAX_SPEED; // Speed scalar + env->observations[i++] = pitch / PI; // -0.5 to 0.5 + env->observations[i++] = roll / PI; // -1 to 1 + env->observations[i++] = yaw / PI; // -1 to 1 (or omit - world symmetric) + + // Target angles + env->observations[i++] = azimuth / PI; // -1 to 1 + env->observations[i++] = elevation / (PI * 0.5f); // -1 to 1 + env->observations[i++] = dist / (GUN_RANGE * 2.0f); // ~0-2 + env->observations[i++] = closing_rate * INV_MAX_SPEED; + + // Opponent info + env->observations[i++] = opp_heading_rel / PI; // -1 to 1 + + // OBS_SIZE = 12 for this scheme +} +``` + +--- + +## Notes + +- Angle normalization: divide by PI to get [-1, 1] range +- Discontinuity handling: atan2 handles ±180° gracefully, but crossing still causes jump +- Alternative: use sin/cos of angles instead of raw angles (no discontinuity) + - `sin(azimuth), cos(azimuth)` instead of `azimuth` + - Doubles the observation count but removes discontinuities + +--- + +## References + +- drone_race.h: Body-frame transform pattern (lines 79-85) +- TRAINING_IMPROVEMENTS.md: Original observation improvement ideas +- **REALISTIC_SCHEMES.md**: Proposed schemes 6-9 (MINIMAL, GUNSIGHT, ENERGY, PLUS) for Aces High III transfer - tests minimal observations needed for dogfighting +- aceshigh/DLL_SPEC.md: How observation choice affects DLL data requirements +- PufferLib sweep docs: (link to docs if available) diff --git a/pufferlib/ocean/dogfight/P51d_REFERENCE_DATA.md b/pufferlib/ocean/dogfight/P51d_REFERENCE_DATA.md new file mode 100644 index 000000000..d63910193 --- /dev/null +++ b/pufferlib/ocean/dogfight/P51d_REFERENCE_DATA.md @@ -0,0 +1,815 @@ +# P-51D Mustang Reference Data for RL Simulation Validation + +## Document Purpose + +This document provides authoritative reference data for the P-51D Mustang to enable validation of a simplified RL flight simulation. The goal is NOT perfect simulation fidelity, but rather reasonable agreement with historical performance data to ensure the RL environment conveys realistic WW2 fighter dynamics. + +**Key Philosophy**: Run automated test scripts that "hijack" the policy (e.g., "maintain level flight at 100% throttle") and compare simulated performance against these reference values. + +--- + +## 1. STANDARD TEST CONDITION + +For all validation tests, use this standardized configuration: + +| Parameter | Value | Notes | +|-----------|-------|-------| +| **Weight** | 9,000 lb (4,082 kg) | ~25% internal fuel (~45 gal remaining) | +| **Fuel Load** | 45 US gal | From 180 gal full internal | +| **Altitude** | Sea level (unless noted) | ISA conditions | +| **Configuration** | Clean | No external stores, gear up | +| **Power Setting** | As specified per test | | + +**Why 9,000 lb?** This represents a combat-ready fighter after burning fuel en route to engagement: +- Empty weight: 7,635 lb +- Pilot + equipment: ~200 lb +- Ammo (full): ~330 lb +- Oil: ~90 lb +- 45 gal fuel: ~270 lb +- Misc: ~475 lb +- **Total: ~9,000 lb** + +Historical reference: NAA report NA-46-130 uses 9,611 lb (full 180 gal internal fuel). + +--- + +## 2. PHYSICAL DIMENSIONS + +### 2.1 Overall Dimensions + +| Parameter | Imperial | Metric | +|-----------|----------|--------| +| Length | 32.25 ft | 9.83 m | +| Wingspan | 37.0 ft | 11.28 m | +| Height (tail down) | 13.67 ft | 4.17 m | +| Wing Area | 233 ft² | 21.65 m² | + +### 2.2 Wing Geometry + +| Parameter | Value | Notes | +|-----------|-------|-------| +| Aspect Ratio | 5.86 | AR = b²/S = 37²/233 | +| Mean Aerodynamic Chord (MAC) | 6.63 ft | 2.02 m | +| Root Chord | 8.48 ft | 2.58 m | +| Tip Chord | 3.87 ft | 1.18 m | +| Taper Ratio | 0.456 | λ = c_tip/c_root | +| **Wing Incidence** | **+1° to +2°** | Root chord to fuselage datum | +| Washout (twist) | 2-3° | Tip relative to root (reduces tip stall) | +| Dihedral | ~5° | | + +### 2.3 Control Surfaces + +| Surface | Span | Chord (% wing) | Area | +|---------|------|----------------|------| +| Aileron | 8.5 ft each | ~25% | ~9 ft² each | +| Flap | ~5.5 ft each | ~25% | ~12 ft² each | + +### 2.4 Tail Geometry + +| Parameter | Horizontal Stabilizer | Vertical Stabilizer | +|-----------|----------------------|---------------------| +| Area | 45.4 ft² | 14.8 ft² | +| Span | 13.1 ft | 4.7 ft | +| Root Chord | 4.6 ft | 4.7 ft | +| Tip Chord | 2.3 ft | 1.6 ft | + +--- + +## 3. AERODYNAMIC DATA + +### 3.1 Airfoil + +**Profile**: NAA/NACA 45-100 (laminar flow) +- Root: 15.1% thick, 1.6% camber at 50% chord +- Tip: 11.4% thick, 1.6% camber at 50% chord +- Max thickness at ~39% chord (laminar flow design) + +### 3.2 Lift Characteristics + +| Parameter | Value | Source/Notes | +|-----------|-------|--------------| +| **C_L_α (lift curve slope)** | 0.095-0.10 /deg | ~5.4-5.7 /rad; 3D wing | +| **α₀ (zero-lift angle)** | **-1.0° to -1.5°** | Due to 1.6% camber | +| **C_L_max (clean)** | 1.45 - 1.50 | Flaps up | +| **C_L_max (flaps 50°)** | 1.80 - 1.90 | Landing configuration | +| **α_stall (clean)** | 19.1° | From IL-2 data | +| **α_stall (flaps)** | 16.3° | Landing configuration | + +**Lift Equation**: +``` +C_L = C_L_α × (α - α₀) +C_L = 0.097 × (α + 1.2°) [deg input] +C_L = 5.56 × (α + 0.021) [rad input, α₀ in rad] +``` + +### 3.3 Drag Characteristics + +| Parameter | Value | Source/Notes | +|-----------|-------|--------------| +| **C_D0 (zero-lift drag)** | 0.0163 | Published Wikipedia/museum data | +| **Oswald Efficiency (e)** | 0.75 - 0.80 | Typical for tapered wing | +| **K (induced drag factor)** | 0.072 | K = 1/(π × AR × e) = 1/(π × 5.86 × 0.75) | + +**Drag Polar**: +``` +C_D = C_D0 + K × C_L² +C_D = 0.0163 + 0.072 × C_L² +``` + +**Drag Area**: 3.80 ft² (0.35 m²) + +### 3.4 Lift-to-Drag Ratio + +| Condition | L/D | Notes | +|-----------|-----|-------| +| Maximum L/D | 14.6 | At optimal C_L | +| Cruise (~350 mph) | ~12-13 | | +| Combat maneuvering | 6-10 | Higher C_L, more induced drag | + +**Optimal L/D occurs at**: +``` +C_L_opt = sqrt(C_D0 / K) = sqrt(0.0163 / 0.072) = 0.476 +L/D_max = 1 / (2 × sqrt(C_D0 × K)) = 1 / (2 × sqrt(0.0163 × 0.072)) = 14.6 +``` + +--- + +## 4. PROPULSION DATA + +### 4.1 Engine: Packard V-1650-7 (Merlin 66) + +| Parameter | Value | +|-----------|-------| +| Type | Liquid-cooled V-12, 2-stage 2-speed supercharger | +| Displacement | 1,647 in³ (27 L) | +| Bore × Stroke | 5.4" × 6.0" | +| Compression Ratio | 6.0:1 | +| Propeller Gear Ratio | 0.479:1 | + +### 4.2 Power Settings + +| Rating | MAP (in Hg) | RPM | BHP (SL) | Time Limit | +|--------|-------------|-----|----------|------------| +| **Military** | 61" | 3,000 | 1,490 hp | 15 min | +| **WEP (67")** | 67" | 3,000 | 1,650 hp | 5 min | +| **WEP (150 grade)** | 75" | 3,000 | 1,860 hp | 5 min | +| Cruise | 46" | 2,700 | ~1,100 hp | Unlimited | + +### 4.3 Power vs Altitude (Military Power, 61" Hg) + +| Altitude (ft) | BHP | Notes | +|---------------|-----|-------| +| Sea Level | 1,490 | Low blower | +| 2,600 | 1,580 | Peak low blower | +| 6,400 | 1,400 | High blower | +| 9,700 | ~1,400 | Near optimal high blower | +| 15,000 | ~1,350 | | +| 25,000 | ~1,100 | | +| 35,000 | ~750 | | + +**Simplified Altitude Correction** (for sim): +```python +def get_power(P_rated, altitude_ft, throttle=1.0): + """Simplified Merlin power model""" + sigma = get_density_ratio(altitude_ft) + + # Two-stage supercharger approximation + if altitude_ft < 4000: + # Low blower - slight power increase + P_available = P_rated * min(1.0, sigma * 1.05) + elif altitude_ft < 10000: + # Low blower critical altitude region + P_available = P_rated * 1.0 + elif altitude_ft < 25000: + # High blower + P_available = P_rated * (0.95 - 0.01 * (altitude_ft - 10000) / 1000) + else: + # Above critical altitude, power drops + P_available = P_rated * sigma * 1.2 + + return P_available * throttle +``` + +### 4.4 Propeller + +| Parameter | Value | +|-----------|-------| +| Type | Hamilton Standard 24D50 constant-speed | +| Diameter | 11 ft 2 in (3.40 m) | +| Blades | 4 | +| Propeller RPM at 3000 engine RPM | 1,437 | +| **Propeller Efficiency (cruise)** | 0.80 - 0.85 | +| **Propeller Efficiency (climb)** | 0.75 - 0.80 | +| **Propeller Efficiency (static)** | 0.50 - 0.60 | + +**Thrust Calculation**: +```python +def get_thrust(power_hp, velocity_fps, eta_prop=0.80): + """ + T = η_p × P / V + + power_hp: shaft horsepower + velocity_fps: true airspeed in ft/s + eta_prop: propeller efficiency (0.50-0.85 depending on V) + returns: thrust in lbf + """ + power_ftlb_s = power_hp * 550 # Convert HP to ft⋅lb/s + + if velocity_fps < 50: # Low speed / static + # Use momentum theory approximation + eta_prop = 0.55 + velocity_fps = max(velocity_fps, 30) # Avoid division by zero + + thrust_lbf = eta_prop * power_ftlb_s / velocity_fps + return thrust_lbf +``` + +--- + +## 5. PERFORMANCE VALIDATION TARGETS + +### 5.1 Level Flight - Maximum Speed + +**Test**: Set throttle to specified power, maintain level flight (γ = 0), record stabilized TAS. + +| Condition | Power | Altitude | Target Speed | Tolerance | +|-----------|-------|----------|--------------|-----------| +| WEP | 67" Hg | Sea Level | 368 mph (592 km/h) | ±10 mph | +| Military | 61" Hg | Sea Level | 355 mph (571 km/h) | ±10 mph | +| WEP | 67" Hg | 11,300 ft | 414 mph (666 km/h) | ±10 mph | +| Military | 61" Hg | 13,300 ft | 412 mph (663 km/h) | ±10 mph | +| WEP | 67" Hg | 24,500 ft | 440 mph (708 km/h) | ±10 mph | +| Military | 61" Hg | 26,200 ft | 435 mph (700 km/h) | ±10 mph | + +**Test Script Logic**: +```python +def test_max_speed(sim, altitude_ft, power_setting): + """ + 1. Initialize at altitude, moderate speed + 2. Set throttle to power_setting + 3. Command pitch to maintain level (gamma = 0) + 4. Run until speed stabilizes (d|V|/dt < 0.1 ft/s²) + 5. Record stabilized TAS + """ + sim.reset(altitude=altitude_ft, speed_fps=400) + sim.set_throttle(power_setting) + + for step in range(10000): # Max 200 seconds at 50 Hz + # Simple level-flight controller + gamma = sim.get_flight_path_angle() + pitch_cmd = -gamma * 2.0 # P controller + sim.set_pitch_rate_cmd(pitch_cmd) + + sim.step() + + if abs(sim.get_acceleration()) < 0.1: + return sim.get_TAS_mph() + + return sim.get_TAS_mph() # Return final value +``` + +### 5.2 Stall Speed + +**Test**: At constant altitude, gradually reduce power while maintaining level flight until stall. + +| Weight (lb) | Configuration | Target Stall Speed | Notes | +|-------------|---------------|-------------------|-------| +| 9,071 | Clean | ~100 mph (161 km/h) IAS | | +| 9,071 | Flaps down | 95.4 mph (154 km/h) IAS | NAA data | +| 9,000 | Clean | 99 mph (159 km/h) IAS | Test weight | +| 10,000 | Clean | 104 mph (168 km/h) IAS | Heavier | + +**Stall Speed Formula**: +```python +def calculate_stall_speed(weight_lb, rho_slugft3, wing_area_ft2, CL_max): + """ + V_stall = sqrt(2W / (ρ × S × CL_max)) + + At sea level (ρ = 0.002377 slug/ft³), S = 233 ft², CL_max = 1.48: + V_stall = sqrt(2 × 9000 / (0.002377 × 233 × 1.48)) + V_stall = sqrt(18000 / 0.820) = sqrt(21951) = 148 ft/s = 101 mph + """ + V_stall_fps = math.sqrt(2 * weight_lb / (rho_slugft3 * wing_area_ft2 * CL_max)) + return V_stall_fps * 0.6818 # Convert to mph +``` + +### 5.3 Rate of Climb + +**Test**: Set throttle to specified power, maintain V_y (best climb speed ~165 mph IAS), record climb rate. + +| Power | Altitude | Target ROC | Tolerance | +|-------|----------|------------|-----------| +| WEP (67") | Sea Level | 3,410 ft/min | ±200 ft/min | +| Military (61") | Sea Level | 3,030 ft/min | ±200 ft/min | +| WEP | 7,500 ft | 3,510 ft/min | ±200 ft/min | +| WEP | 21,200 ft | 2,680 ft/min | ±200 ft/min | +| Military | 9,700 ft | 3,170 ft/min | ±200 ft/min | +| Military | 23,200 ft | 2,300 ft/min | ±200 ft/min | + +**Rate of Climb Formula** (steady): +```python +def calculate_ROC(thrust_lb, drag_lb, velocity_fps, weight_lb): + """ + ROC = (T - D) × V / W = V × sin(γ) + + Or: ROC = (P_available × η_p - P_required) / W + """ + excess_thrust = thrust_lb - drag_lb + sin_gamma = excess_thrust / weight_lb + ROC_fps = velocity_fps * sin_gamma + return ROC_fps * 60 # Convert to ft/min +``` + +**Test Script**: +```python +def test_climb_rate(sim, altitude_ft, power_setting, climb_speed_mph=165): + """ + 1. Initialize at altitude, at Vy + 2. Set throttle, maintain constant IAS + 3. Record stabilized climb rate + """ + V_target_fps = climb_speed_mph * 1.467 + sim.reset(altitude=altitude_ft, speed_fps=V_target_fps) + sim.set_throttle(power_setting) + + for step in range(5000): + # Maintain constant airspeed in climb + V_error = V_target_fps - sim.get_TAS_fps() + pitch_cmd = -V_error * 0.1 # Speed-hold via pitch + sim.set_pitch_rate_cmd(pitch_cmd) + sim.step() + + return sim.get_climb_rate_fpm() +``` + +### 5.4 Level Turn Performance + +**Test**: Maintain altitude and constant speed in coordinated turn, measure turn rate and radius. + +| Speed (IAS) | Load Factor | Turn Rate | Turn Radius | Turn Time | +|-------------|-------------|-----------|-------------|-----------| +| 180 mph (290 km/h) | ~3.5g | 18°/s | ~800 ft | 20 sec | +| 250 mph | ~2.5g | 10°/s | ~1,500 ft | 36 sec | +| 350 mph | ~2.0g | 5°/s | ~3,500 ft | 72 sec | + +**Turn Physics**: +```python +def calculate_turn(velocity_fps, load_factor, g=32.174): + """ + In a level turn: + L = n × W (lift = load factor × weight) + L_vertical = W (to maintain altitude) + L_horizontal = W × sqrt(n² - 1) (centripetal force) + + Turn radius: R = V² / (g × sqrt(n² - 1)) + Turn rate: ω = g × sqrt(n² - 1) / V [rad/s] + """ + sqrt_term = math.sqrt(load_factor**2 - 1) + + radius_ft = velocity_fps**2 / (g * sqrt_term) + turn_rate_rad_s = g * sqrt_term / velocity_fps + turn_rate_deg_s = math.degrees(turn_rate_rad_s) + turn_time_sec = 360 / turn_rate_deg_s + + return { + 'radius_ft': radius_ft, + 'turn_rate_deg_s': turn_rate_deg_s, + 'turn_time_sec': turn_time_sec + } + +# Example: 290 km/h (180 mph = 264 ft/s) at 3.5g +# sqrt(3.5² - 1) = sqrt(11.25) = 3.35 +# R = 264² / (32.17 × 3.35) = 69696 / 107.8 = 647 ft +# ω = 32.17 × 3.35 / 264 = 0.408 rad/s = 23.4°/s +``` + +### 5.5 Level Flight at Zero AoA + +**Critical Test**: What speed maintains level flight at α = 0°? + +With wing incidence of ~+1.5°, the wing is at α_wing = +1.5° when fuselage is level. +At this AoA, considering α₀ ≈ -1.2°: +``` +C_L = C_L_α × (α_wing - α₀) +C_L = 0.097 × (1.5 - (-1.2)) = 0.097 × 2.7° = 0.262 +``` + +**Speed for Level Flight at 0° Fuselage Pitch (α_wing = +1.5°)**: +```python +# L = W (level flight) +# q × S × C_L = W +# 0.5 × ρ × V² × S × C_L = W +# V = sqrt(2W / (ρ × S × C_L)) + +W = 9000 # lb +rho = 0.002377 # slug/ft³ at sea level +S = 233 # ft² +C_L = 0.262 + +V = math.sqrt(2 * W / (rho * S * C_L)) +# V = sqrt(18000 / 0.145) = sqrt(124138) = 352 ft/s = 240 mph +``` + +**At true 0° wing AoA** (fuselage pitched down 1.5°): +``` +C_L = 0.097 × (0 - (-1.2)) = 0.097 × 1.2° = 0.116 + +V = sqrt(2 × 9000 / (0.002377 × 233 × 0.116)) +V = sqrt(18000 / 0.064) = sqrt(281250) = 530 ft/s = 361 mph +``` + +**Summary Table - Level Flight AoA vs Speed**: + +| Fuselage Pitch | Wing α | C_L | Speed (mph) | Speed (ft/s) | +|----------------|--------|-----|-------------|--------------| +| -1.5° | 0° | 0.116 | 361 | 530 | +| 0° | 1.5° | 0.262 | 240 | 352 | +| +2° | 3.5° | 0.456 | 182 | 267 | +| +5° | 6.5° | 0.747 | 142 | 208 | +| +10° | 11.5° | 1.233 | 111 | 163 | +| +15° | 16.5° (near stall) | 1.48 | 101 | 148 | + +--- + +## 6. FLIGHT ENVELOPE LIMITS + +### 6.1 Speed Limits + +| Limit | Speed | Notes | +|-------|-------|-------| +| V_NE (never exceed) | 505 mph IAS | 812 km/h | +| V_max dive | 525-550 mph | Pilots reported exceeding redline | +| M_crit | 0.75-0.80 | Onset of compressibility | +| Max Mach achieved | ~0.85 | With structural risk | + +### 6.2 G-Limits + +| Condition | Limit | Notes | +|-----------|-------|-------| +| Design limit (positive) | +8g | At 8,000 lb | +| Design limit (negative) | -4g | | +| With external stores | +6.5g | Reduced for safety | +| Ultimate load | +12g | Structural failure | + +### 6.3 Altitude Limits + +| Limit | Value | Notes | +|-------|-------|-------| +| Service ceiling | 41,900 ft | 100 ft/min ROC | +| Absolute ceiling | ~44,000 ft | | +| Combat ceiling | 36,900 ft | At 3000 RPM | + +--- + +## 7. IMPLEMENTATION CONSTANTS + +Copy-paste ready Python constants: + +```python +# ============================================ +# P-51D MUSTANG SIMULATION CONSTANTS +# Reference Weight: 9000 lb (combat weight) +# ============================================ + +# Physical Dimensions +WINGSPAN_FT = 37.0 +WINGSPAN_M = 11.28 +WING_AREA_FT2 = 233.0 +WING_AREA_M2 = 21.65 +MAC_FT = 6.63 +ASPECT_RATIO = 5.86 + +# Mass Properties (test condition) +WEIGHT_LB = 9000.0 +WEIGHT_KG = 4082.0 +MASS_SLUG = WEIGHT_LB / 32.174 # 279.8 slug +MASS_KG = WEIGHT_KG + +# Wing Geometry +WING_INCIDENCE_DEG = 1.5 # Root chord to fuselage +WING_INCIDENCE_RAD = 0.0262 + +# Aerodynamic Coefficients +CL_ALPHA_PER_DEG = 0.097 # 3D wing lift curve slope +CL_ALPHA_PER_RAD = 5.56 +ALPHA_ZERO_LIFT_DEG = -1.2 # Zero-lift angle (cambered airfoil) +ALPHA_ZERO_LIFT_RAD = -0.021 +CL_MAX_CLEAN = 1.48 +CL_MAX_FLAPS = 1.85 +CD0 = 0.0163 # Zero-lift drag coefficient +OSWALD_E = 0.75 # Oswald efficiency factor +K_INDUCED = 1.0 / (3.14159 * ASPECT_RATIO * OSWALD_E) # 0.072 + +# Stall +ALPHA_STALL_CLEAN_DEG = 19.1 +ALPHA_STALL_FLAPS_DEG = 16.3 + +# Propulsion +ENGINE_POWER_WEP_HP = 1650 # 67" Hg, 3000 RPM +ENGINE_POWER_MIL_HP = 1490 # 61" Hg, 3000 RPM +ENGINE_POWER_CRUISE_HP = 1100 # 46" Hg, 2700 RPM +PROP_DIAMETER_FT = 11.167 # 11 ft 2 in +PROP_EFFICIENCY_CRUISE = 0.82 +PROP_EFFICIENCY_CLIMB = 0.78 +PROP_EFFICIENCY_STATIC = 0.55 + +# Limits +VNE_MPH = 505 +VNE_FPS = 740 +G_LIMIT_POS = 8.0 +G_LIMIT_NEG = -4.0 +SERVICE_CEILING_FT = 41900 + +# Atmosphere (sea level ISA) +RHO_SL_SLUGFT3 = 0.002377 +RHO_SL_KGM3 = 1.225 +TEMP_SL_K = 288.15 +PRESSURE_SL_PA = 101325 +LAPSE_RATE_K_PER_M = 0.0065 + +# Unit Conversions +FPS_TO_MPH = 0.6818 +MPH_TO_FPS = 1.467 +FPS_TO_KTS = 0.5925 +KTS_TO_FPS = 1.688 +FT_TO_M = 0.3048 +M_TO_FT = 3.281 +HP_TO_WATTS = 745.7 +LBF_TO_N = 4.448 +``` + +--- + +## 8. VALIDATION TEST SUITE + +### 8.1 Test Categories + +| Test | Priority | Tolerance | Notes | +|------|----------|-----------|-------| +| Stall speed | HIGH | ±5 mph | Fundamental lift validation | +| Max speed (SL) | HIGH | ±10 mph | Drag + power validation | +| Max speed (altitude) | MEDIUM | ±15 mph | Supercharger model | +| ROC (SL) | HIGH | ±200 ft/min | Excess power validation | +| ROC (altitude) | MEDIUM | ±300 ft/min | | +| Level turn time | MEDIUM | ±2 sec | Turn physics | +| Level flight speed @ α=0 | HIGH | ±15 mph | Incidence angle validation | + +### 8.2 Example Test Script + +```python +import numpy as np + +class P51DValidationSuite: + """Validation tests for P-51D flight model""" + + def __init__(self, sim_env): + self.sim = sim_env + self.results = {} + + def test_stall_speed(self): + """Test: Gradually reduce speed until stall""" + # Initialize at safe speed, level flight + self.sim.reset(altitude_ft=5000, speed_fps=250, gamma_deg=0) + self.sim.set_weight(9000) + + # Gradually reduce throttle while maintaining level + for throttle in np.linspace(1.0, 0.0, 100): + self.sim.set_throttle(throttle) + + # Run for 5 seconds to stabilize + for _ in range(250): # 50 Hz × 5 sec + # Level flight controller + gamma = self.sim.get_gamma() + self.sim.command_pitch_rate(-gamma * 2.0) + self.sim.step() + + # Check for stall + if self.sim.get_alpha() > 18 or self.sim.is_stalled(): + stall_speed_mph = self.sim.get_speed_fps() * 0.6818 + break + + expected = 100 # mph + actual = stall_speed_mph + passed = abs(actual - expected) < 5 + + self.results['stall_speed'] = { + 'expected': expected, + 'actual': actual, + 'passed': passed, + 'tolerance': 5 + } + return passed + + def test_max_speed_sea_level(self): + """Test: Maximum speed at sea level with WEP""" + self.sim.reset(altitude_ft=0, speed_fps=400, gamma_deg=0) + self.sim.set_weight(9000) + self.sim.set_throttle(1.0) # WEP + + # Accelerate until stable + prev_speed = 0 + for step in range(10000): + # Maintain level flight + gamma = self.sim.get_gamma() + self.sim.command_pitch_rate(-gamma * 2.0) + self.sim.step() + + # Check for stabilization + current_speed = self.sim.get_speed_fps() + if abs(current_speed - prev_speed) < 0.01: + break + prev_speed = current_speed + + max_speed_mph = self.sim.get_speed_fps() * 0.6818 + + expected = 368 # mph at WEP, SL + actual = max_speed_mph + passed = abs(actual - expected) < 10 + + self.results['max_speed_sl'] = { + 'expected': expected, + 'actual': actual, + 'passed': passed, + 'tolerance': 10 + } + return passed + + def test_climb_rate_sea_level(self): + """Test: Rate of climb at sea level with WEP""" + self.sim.reset(altitude_ft=1000, speed_fps=242, gamma_deg=10) # ~165 mph + self.sim.set_weight(9000) + self.sim.set_throttle(1.0) + + target_speed_fps = 242 # 165 mph Vy + + # Climb at constant airspeed + for step in range(5000): + V_error = target_speed_fps - self.sim.get_speed_fps() + self.sim.command_pitch_rate(-V_error * 0.05) + self.sim.step() + + roc_fpm = self.sim.get_vertical_speed_fps() * 60 + + expected = 3410 # ft/min at WEP, SL + actual = roc_fpm + passed = abs(actual - expected) < 200 + + self.results['climb_rate_sl'] = { + 'expected': expected, + 'actual': actual, + 'passed': passed, + 'tolerance': 200 + } + return passed + + def test_turn_time(self): + """Test: 360° turn time at 290 km/h (180 mph)""" + V_fps = 264 # 180 mph + self.sim.reset(altitude_ft=5000, speed_fps=V_fps, gamma_deg=0) + self.sim.set_weight(9000) + self.sim.set_throttle(1.0) + + # Bank to ~70° for ~3.5g turn + bank_target_rad = np.radians(70) + initial_heading = self.sim.get_heading() + + time_elapsed = 0 + while True: + # Maintain bank angle + bank_error = bank_target_rad - self.sim.get_bank() + self.sim.command_roll_rate(bank_error * 2.0) + + # Maintain altitude + # ... (pull back to compensate) + + self.sim.step() + time_elapsed += self.sim.dt + + # Check for 360° turn completion + heading_change = self.sim.get_heading() - initial_heading + if heading_change >= 2 * np.pi: + break + + if time_elapsed > 60: # Timeout + break + + expected = 20 # seconds + actual = time_elapsed + passed = abs(actual - expected) < 2 + + self.results['turn_time'] = { + 'expected': expected, + 'actual': actual, + 'passed': passed, + 'tolerance': 2 + } + return passed + + def run_all_tests(self): + """Run complete validation suite""" + tests = [ + ('Stall Speed', self.test_stall_speed), + ('Max Speed (SL)', self.test_max_speed_sea_level), + ('Climb Rate (SL)', self.test_climb_rate_sea_level), + ('Turn Time', self.test_turn_time), + ] + + print("=" * 60) + print("P-51D MUSTANG FLIGHT MODEL VALIDATION") + print("=" * 60) + + all_passed = True + for name, test_func in tests: + try: + passed = test_func() + result = self.results.get(name.lower().replace(' ', '_').replace('(', '').replace(')', ''), {}) + status = "✓ PASS" if passed else "✗ FAIL" + print(f"{name:20s}: {status}") + print(f" Expected: {result.get('expected', 'N/A')}") + print(f" Actual: {result.get('actual', 'N/A'):.1f}") + print(f" Tolerance: ±{result.get('tolerance', 'N/A')}") + all_passed = all_passed and passed + except Exception as e: + print(f"{name:20s}: ✗ ERROR - {e}") + all_passed = False + + print("=" * 60) + print(f"OVERALL: {'ALL TESTS PASSED' if all_passed else 'SOME TESTS FAILED'}") + print("=" * 60) + + return all_passed +``` + +--- + +## 9. QUICK REFERENCE CARD + +### For Simulator Implementation + +**Level Flight Speed by AoA (9000 lb, sea level)**: +| Wing AoA | Speed | +|----------|-------| +| 0° | 361 mph | +| 2° | 263 mph | +| 5° | 166 mph | +| 10° | 117 mph | +| 15° | 105 mph (near stall) | + +**Key Performance Numbers (Military Power, 9000 lb)**: +- Max Speed (SL): 355 mph +- Max Speed (25,000 ft): 435 mph +- Stall Speed (clean): 100 mph +- ROC (SL): 3,000 ft/min +- Best Climb Speed: 165 mph IAS + +**Equations to Implement**: +``` +C_L = 5.56 × (α - (-0.021)) [rad] +C_D = 0.0163 + 0.072 × C_L² +L = 0.5 × ρ × V² × 233 × C_L [lb] +D = 0.5 × ρ × V² × 233 × C_D [lb] +T = η_p × (P × 550) / V [lb] +``` + +--- + +## 10. SOURCES AND REFERENCES + +1. **NAA Report NA-46-130**: Performance Calculations for P-51D Airplane + - Source: wwiiaircraftperformance.org + - Primary source for validated performance data + +2. **IL-2 Great Battles**: P-51D-15 Specifications + - Source: aergistal.github.io/il2/planes/p51d15.html + - Extensively validated against historical records + +3. **Virginia Tech Aerospace Archive**: P-51D Mustang Student Report + - Source: archive.aoe.vt.edu/mason/Mason_f/P51DMustang.pdf + - Aerodynamic data and wing geometry + +4. **Mid America Flight Museum**: P-51 Mustang Specifications + - Source: midamericaflightmuseum.com + - CD0, drag area, L/D data + +5. **WW2Aircraft.net Forums**: Technical discussions + - CL_max, wing incidence, airfoil data + - Model builder and pilot experiences + +6. **Packard V-1650 Merlin Wikipedia**: Engine specifications + - Power curves, supercharger data + +7. **NACA Airfoil Theory**: Thin airfoil theory for α₀ + - Zero-lift angle calculations + +--- + +## Document Version + +- **Version**: 1.0 +- **Date**: January 2026 +- **Purpose**: RL Environment Validation Reference +- **Author**: Claude (Anthropic) + Research + +--- + +*Note: This document is intended for simulation purposes. Actual aircraft performance varies with exact configuration, atmospheric conditions, and pilot technique. Tolerances are provided to account for these variations and simplifications inherent in the simulation model.* diff --git a/pufferlib/ocean/dogfight/PLAN.md b/pufferlib/ocean/dogfight/PLAN.md new file mode 100644 index 000000000..0907a6f53 --- /dev/null +++ b/pufferlib/ocean/dogfight/PLAN.md @@ -0,0 +1,161 @@ +# Dogfight Implementation Plan + +**Note: This plan is a living document and may be adjusted as development progresses.** + +First checkbox: initial implementation complete +Second checkbox: audited and verified + +--- + +## Phase 0: Scaffolding +- [x] [ ] 0.1 Create pufferlib/ocean/dogfight/ folder +- [x] [ ] 0.2 Create dogfight.h with basic Dogfight struct (observations, actions, rewards, terminals, Log) +- [x] [ ] 0.2b Define Log struct with ONLY float fields (env_binding.h iterates as floats): + episode_return, episode_length, score, kills, deaths, shots_fired, shots_hit, n +- [x] [ ] 0.3 Create binding.c: implement my_init() (unpack kwargs, call init()) and my_log() (map Log fields to dict) +- [x] [ ] 0.4 Create dogfight.py following drone_race.py pattern: + - Box(5) continuous actions [-1, 1] + - self.actions = self.actions.astype(np.float32) # REQUIRED for continuous +- [x] [ ] 0.5 Create pufferlib/config/ocean/dogfight.ini: + [base] package=ocean, env_name=puffer_dogfight + [vec] num_envs=8 + [env] num_envs=128, max_steps=3000 + [train] hyperparameters +- [x] [ ] 0.6 Verify setup.py compiles binding (already configured at line 192) +- [x] [ ] 0.7 Verify env can be instantiated: `from pufferlib.ocean.dogfight.dogfight import Dogfight` + +## Phase 1: Minimal Viable Environment +- [x] [ ] 1.1 Implement init(), c_close() → test_init() +- [x] [ ] 1.2 Define Plane struct (pos, vel, ori quat) → test_reset_plane() +- [x] [ ] 1.3 Implement c_reset(): spawn plane random pos → test_c_reset() +- [x] [ ] 1.4 Implement c_step(): plane moves forward → test_c_step_moves_forward() +- [x] [ ] 1.5 Implement compute_observations(): pos/vel/ori/up normalized → test_compute_observations() +- [x] [ ] 1.6 Wire up actions array (float* for 5 floats) - read but ignore for now +- [x] [ ] 1.7 Episode terminates: OOB → test_oob_terminates(), max_steps → test_max_steps_terminates() +- [x] [ ] 1.8 Python integration: env.reset() and env.step() work + +## Phase 2: Target Plane (Scripted Opponent) +- [x] [ ] 2.1 Add second Plane struct for opponent → test_opponent_spawns() +- [x] [ ] 2.2 Opponent spawns ahead of player, flies straight → test_opponent_spawns() +- [x] [ ] 2.3 Add relative position to opponent in observations → test_relative_observations() +- [x] [ ] 2.4 Add relative velocity to opponent in observations → test_relative_observations() +- [x] [ ] 2.5 Define observation space size in Python: OBS_SIZE=19 +- [x] [ ] 2.6 Basic reward: negative distance to opponent → test_pursuit_reward() +- [x] [ ] 2.7 Python integration: obs shape (19,), negative reward working + +## Phase 3: Flight Physics (Controls + Aerodynamics merged - correct order) +- [x] [ ] 3.1 Add aircraft parameters → test_aircraft_params() +- [x] [ ] 3.2 Quaternion orientation → done in Phase 1 +- [x] [ ] 3.3 Map throttle action [0] to engine power → test_throttle_accelerates() +- [x] [ ] 3.4 Map elevator action [1] to pitch rate → test_controls_affect_orientation() +- [x] [ ] 3.5 Map ailerons action [2] to roll rate → test_controls_affect_orientation() +- [x] [ ] 3.6 Map rudder action [3] to yaw rate → step_plane_with_physics() +- [x] [ ] 3.7 Add rate limits → MAX_PITCH_RATE, MAX_ROLL_RATE, MAX_YAW_RATE +- [x] [ ] 3.8 Integrate orientation: q_dot = 0.5 * q * omega_quat → test_controls_affect_orientation() +- [x] [ ] 3.9 Compute angle of attack → step_plane_with_physics() +- [x] [ ] 3.10 Compute C_L clamped to C_L_max → test_stall_clamps_lift() +- [x] [ ] 3.11 Implement dynamic pressure → test_dynamic_pressure() +- [x] [ ] 3.12 Compute lift magnitude → test_lift_opposes_gravity() +- [x] [ ] 3.13 Compute drag magnitude → test_drag_slows_plane() +- [x] [ ] 3.14 Velocity-dependent propeller thrust → test_throttle_accelerates() +- [x] [ ] 3.15 Compute weight → test_plane_falls_without_lift() +- [x] [ ] 3.16 Transform forces to world frame → step_plane_with_physics() +- [x] [ ] 3.17 Sum forces → test_forces_sum_correctly() +- [x] [ ] 3.18 Integrate: a = F/m, v += a*dt, pos += v*dt → test_integration_updates_state() +- [x] [ ] 3.19 Enforce C_L ≤ C_L_max (stall) → test_stall_clamps_lift() +- [x] [ ] 3.20 Enforce n ≤ 8 (g-limit) → test_glimit_clamps_acceleration() +- [x] [ ] 3.21 Test: all Phase 3 tests pass (23 total) + +## Phase 3.5: Reward Shaping +Current pursuit reward (-dist/10000 per step) is too weak for effective learning. + +- [x] [ ] 3.5.1 Add closing velocity reward: +bonus when distance decreasing → test_closing_velocity_reward() +- [x] [ ] 3.5.2 Add tail position reward: +bonus when behind opponent (angle from opponent's forward) → test_tail_position_reward() +- [x] [ ] 3.5.3 Add altitude maintenance: small penalty for z < 200m or z > 2500m → test_altitude_penalty() +- [x] [ ] 3.5.4 Add speed maintenance: small penalty for V < 50 m/s (stall risk) → test_speed_penalty() +- [x] [ ] 3.5.5 Scale rewards appropriately (total episode reward ~10-100 for good policy) +- [x] [ ] 3.5.6 Test: training shows faster convergence with new rewards (2/3 runs positive) + +## Phase 4: Rendering +**Moved before Combat** - Can't debug combat without seeing planes. + +**Implementation Guide**: See `RENDERING.md` for code patterns, templates, and Raylib reference. + +Camera and visibility: +- [x] [ ] 4.1 Fix camera: chase cam behind player, ~80m back → test_chase_camera_behind_player() +- [x] [ ] 4.2 Camera follows player position and orientation → test_chase_camera_behind_player() +- [x] [ ] 4.3 Add mouse controls for camera orbit (like drone_race) → test_camera_orbit_updates() + +Drawing planes: +- [x] [ ] 4.4 Draw player plane: green sphere + forward line → dogfight.h:469-478 +- [x] [ ] 4.5 Draw opponent plane: red sphere + forward line → dogfight.h:480-490 +- [ ] [ ] 4.6 Draw velocity vectors for debugging (optional, toggle with key) + +Environment: +- [x] [ ] 4.7 Draw ground plane at z=0 → dogfight.h:462-463 +- [ ] [ ] 4.8 Draw sky gradient or horizon reference +- [x] [ ] 4.9 Draw world bounds (wireframe box) → dogfight.h:465-467 + +HUD: +- [x] [ ] 4.10 Display: speed (m/s), altitude (m), throttle (%) → dogfight.h:498-500 +- [x] [ ] 4.11 Display: distance to opponent, episode tick → dogfight.h:501-502 +- [x] [ ] 4.12 Display: episode return → dogfight.h:503 + +## Phase 5: Combat Mechanics +**Struct additions:** +- Add to Plane: `int fire_cooldown`, `bool alive` (or `float health`) + +**Constants:** +- `GUN_RANGE` = 500.0f (meters) +- `GUN_CONE_ANGLE` = 0.087f (5 degrees in radians) +- `FIRE_COOLDOWN` = 10 (ticks = 0.2 seconds) + +**Implementation:** +- [x] [ ] 5.1 Add fire_cooldown field to Plane struct → dogfight.h:96 +- [x] [ ] 5.2 Add combat constants → dogfight.h:35-38, test_combat_constants() +- [x] [ ] 5.3 Map trigger action [4] to fire → test_trigger_fires(), test_fire_cooldown() +- [x] [ ] 5.4 Implement cone check hit detection → test_cone_hit_detection() +- [x] [ ] 5.5 Track shots_fired in Log when trigger pulled → test_trigger_fires() +- [x] [ ] 5.6 Track shots_hit in Log when hit detected → test_hit_reward() +- [x] [ ] 5.7 Reward for hit: +1.0 → test_hit_reward() +- [x] [ ] 5.8 On kill: respawn opponent, +10.0 reward → test_kill_respawns_opponent() +- [x] [ ] 5.9 Episode does NOT terminate on kill → test_kill_respawns_opponent() +- [x] [ ] 5.10 All combat tests pass (6 tests) → 36 total tests PASS + +## Phase 6: Opponent AI +**Physics fix:** Both planes must use same physics model. + +- [ ] [ ] 6.1 Add `float opponent_actions[5]` array (computed by AI each step) +- [ ] [ ] 6.2 Call `step_plane_with_physics(&env->opponent, opponent_actions, DT)` instead of `step_plane()` +- [ ] [ ] 6.3 Remove old `step_plane()` function (no longer needed) + +**AI behaviors (compute_opponent_ai function):** +- [ ] [ ] 6.4 Pure pursuit: turn toward player → test_opponent_pursues() +- [ ] [ ] 6.5 Lead pursuit: aim ahead of player based on closure rate +- [ ] [ ] 6.6 Fire when player in gun cone → test_opponent_fires() +- [ ] [ ] 6.7 Throttle management: speed up when far, maintain when close +- [ ] [ ] 6.8 Basic evasion: break turn when player behind + +**Difficulty scaling:** +- [ ] [ ] 6.9 Add `float ai_skill` parameter (0.0 = random, 1.0 = perfect) +- [ ] [ ] 6.10 Scale AI accuracy/reaction time with skill level +- [ ] [ ] 6.11 Test: opponent provides meaningful challenge at skill=0.5 + +## Phase 7: Tuning & Polish +- [ ] [ ] 7.1 Tune aircraft parameters to match WW2 fighter specs: + - Max level speed: ~180-200 m/s (400-450 mph) + - Climb rate: ~15-20 m/s (3000-4000 ft/min) + - Sustained turn: 4-5g at combat speed + - Corner velocity: ~130-150 m/s (260-300 knots) +- [ ] [ ] 7.2 Verify add_log() populates all fields (score, kills, deaths, shots) +- [ ] [ ] 7.3 Performance profiling +- [ ] [ ] 7.4 Optimize hot paths for 1M+ steps/sec +- [ ] [ ] 7.5 Verify no memory leaks or allocations per step + +## Phase 8: Validation & Audit +- [ ] [ ] 8.1 Full test suite passes +- [ ] [ ] 8.2 Performance benchmark: confirm 1M+ steps/sec +- [ ] [ ] 8.3 Flight model validation: verify corner velocity, sustained turn rate, stall behavior +- [ ] [ ] 8.4 Training run: agent learns to pursue and shoot +- [ ] [ ] 8.5 Code review: all phases second checkbox +- [ ] [ ] 8.6 Documentation complete diff --git a/pufferlib/ocean/dogfight/RENDERING.md b/pufferlib/ocean/dogfight/RENDERING.md new file mode 100644 index 000000000..5a951bf23 --- /dev/null +++ b/pufferlib/ocean/dogfight/RENDERING.md @@ -0,0 +1,208 @@ +# Dogfight Rendering Guide + +Reference patterns extracted from PufferLib ocean environments for Phase 4 implementation. + +## Current State +- `dogfight.h` lines 375-406: basic `c_render()` skeleton with placeholder camera +- Need: chase camera, plane drawing, ground, bounds, HUD + +## Client Struct + +Update the existing Client struct (~line 104) to support camera controls: + +```c +typedef struct Client { + Camera3D camera; + float width; + float height; + // Camera orbit state (for mouse control) + float cam_distance; + float cam_azimuth; + float cam_elevation; + bool is_dragging; + float last_mouse_x, last_mouse_y; +} Client; +``` + +## Chase Camera + +Calculate camera position behind and above player using quaternion orientation: + +```c +// Get player forward vector from quaternion +Vec3 fwd = quat_rotate(player->ori, vec3(1, 0, 0)); + +// Camera position: behind and above player +float dist = 80.0f, height = 30.0f; +Vector3 cam_pos = { + player->pos.x - fwd.x * dist, + player->pos.y - fwd.y * dist, + player->pos.z + height +}; + +// Look at player +Vector3 cam_target = {player->pos.x, player->pos.y, player->pos.z}; +``` + +## Raylib Quick Reference + +| Task | Code | +|------|------| +| Init window | `InitWindow(1280, 720, "Dogfight"); SetTargetFPS(60);` | +| Camera setup | `camera.up = (Vector3){0, 0, 1}; camera.fovy = 45; camera.projection = CAMERA_PERSPECTIVE;` | +| Draw sphere | `DrawSphere((Vector3){x, y, z}, radius, color);` | +| Draw line | `DrawLine3D(start, end, color);` | +| Draw ground | `DrawPlane((Vector3){0, 0, 0}, (Vector2){4000, 4000}, DARKGREEN);` | +| Draw bounds | `DrawCubeWires((Vector3){0, 0, 1500}, 4000, 4000, 3000, WHITE);` | +| HUD text | `DrawText(TextFormat("Speed: %.0f", speed), 10, 10, 20, WHITE);` | + +## Mouse Orbit Controls + +Pattern from `drone_race.h` - allows user to orbit camera with mouse drag: + +```c +void handle_camera_controls(Client *c) { + Vector2 mouse = GetMousePosition(); + + if (IsMouseButtonPressed(MOUSE_LEFT)) { + c->is_dragging = true; + c->last_mouse_x = mouse.x; + c->last_mouse_y = mouse.y; + } + if (IsMouseButtonReleased(MOUSE_LEFT)) { + c->is_dragging = false; + } + + if (c->is_dragging) { + float sensitivity = 0.005f; + c->cam_azimuth -= (mouse.x - c->last_mouse_x) * sensitivity; + c->cam_elevation += (mouse.y - c->last_mouse_y) * sensitivity; + c->cam_elevation = clampf(c->cam_elevation, -1.4f, 1.4f); // prevent gimbal lock + c->last_mouse_x = mouse.x; + c->last_mouse_y = mouse.y; + } + + // Mouse wheel zoom + float wheel = GetMouseWheelMove(); + if (wheel != 0) { + c->cam_distance = clampf(c->cam_distance - wheel * 5.0f, 30.0f, 200.0f); + } +} +``` + +## Complete c_render() Template + +```c +void c_render(Dogfight *env) { + // 1. Lazy init + if (env->client == NULL) { + env->client = (Client *)calloc(1, sizeof(Client)); + env->client->width = 1280; + env->client->height = 720; + env->client->cam_distance = 80.0f; + env->client->cam_azimuth = 0.0f; + env->client->cam_elevation = 0.3f; + + InitWindow(1280, 720, "Dogfight"); + SetTargetFPS(60); + + env->client->camera.up = (Vector3){0.0f, 0.0f, 1.0f}; + env->client->camera.fovy = 45.0f; + env->client->camera.projection = CAMERA_PERSPECTIVE; + } + + // 2. Handle window close + if (WindowShouldClose() || IsKeyDown(KEY_ESCAPE)) { + c_close(env); + exit(0); + } + + // 3. Update chase camera + Plane *p = &env->player; + Vec3 fwd = quat_rotate(p->ori, vec3(1, 0, 0)); + float dist = env->client->cam_distance; + + env->client->camera.position = (Vector3){ + p->pos.x - fwd.x * dist, + p->pos.y - fwd.y * dist, + p->pos.z + dist * 0.4f + }; + env->client->camera.target = (Vector3){p->pos.x, p->pos.y, p->pos.z}; + + // 4. Optional: handle mouse orbit + // handle_camera_controls(env->client); + + // 5. Draw + BeginDrawing(); + ClearBackground((Color){6, 24, 24, 255}); + + BeginMode3D(env->client->camera); + + // Ground plane at z=0 + DrawPlane((Vector3){0, 0, 0}, (Vector2){4000, 4000}, (Color){20, 60, 20, 255}); + + // World bounds wireframe + DrawCubeWires((Vector3){0, 0, 1500}, 4000, 4000, 3000, (Color){100, 100, 100, 255}); + + // Player plane (green) + Vector3 player_pos = {p->pos.x, p->pos.y, p->pos.z}; + DrawSphere(player_pos, 5.0f, GREEN); + // Forward direction indicator + Vector3 player_fwd = {p->pos.x + fwd.x * 30, p->pos.y + fwd.y * 30, p->pos.z + fwd.z * 30}; + DrawLine3D(player_pos, player_fwd, GREEN); + + // Opponent plane (red) + Plane *o = &env->opponent; + Vector3 opp_pos = {o->pos.x, o->pos.y, o->pos.z}; + DrawSphere(opp_pos, 5.0f, RED); + Vec3 opp_fwd = quat_rotate(o->ori, vec3(1, 0, 0)); + Vector3 opp_fwd_end = {o->pos.x + opp_fwd.x * 30, o->pos.y + opp_fwd.y * 30, o->pos.z + opp_fwd.z * 30}; + DrawLine3D(opp_pos, opp_fwd_end, RED); + + EndMode3D(); + + // HUD + float speed = norm3(p->vel); + float dist_to_opp = norm3(sub3(o->pos, p->pos)); + + DrawText(TextFormat("Speed: %.0f m/s", speed), 10, 10, 20, WHITE); + DrawText(TextFormat("Alt: %.0f m", p->pos.z), 10, 40, 20, WHITE); + DrawText(TextFormat("Throttle: %.0f%%", p->throttle * 100.0f), 10, 70, 20, WHITE); + DrawText(TextFormat("Distance: %.0f m", dist_to_opp), 10, 100, 20, WHITE); + DrawText(TextFormat("Tick: %d", env->tick), 10, 130, 20, WHITE); + DrawText(TextFormat("Return: %.2f", env->episode_return), 10, 160, 20, WHITE); + + // Camera controls hint + DrawText("ESC: Exit", 10, env->client->height - 30, 16, GRAY); + + EndDrawing(); +} +``` + +## Coordinate System + +- Dogfight uses: **X=forward, Y=right, Z=up** +- Set `camera.up = {0, 0, 1}` to match +- World bounds: ±2000 X/Y, 0-3000 Z (from `dogfight.h` defines) + +## Reference Environments + +| File | Key Patterns | +|------|--------------| +| `drone_race/drone_race.h` | Spherical orbit camera, mouse controls, trail effects | +| `drive/drive.h` | FPV chase camera using heading angle | +| `battle/battle.h` | Quaternion orientation, `CAMERA_THIRD_PERSON`, 3D models | +| `impulse_wars/render.h` | Smooth camera lerp, bloom effects, sophisticated UI | + +## Build & Test + +```bash +# Build +python setup.py build_ext --inplace --force + +# Run with rendering +python -m pufferlib.pufferl train puffer_dogfight --render + +# Run tests +gcc -I raylib-5.5_linux_amd64/include -o pufferlib/ocean/dogfight/dogfight_test pufferlib/ocean/dogfight/dogfight_test.c raylib-5.5_linux_amd64/lib/libraylib.a -lm -lpthread -ldl && ./pufferlib/ocean/dogfight/dogfight_test +``` diff --git a/pufferlib/ocean/dogfight/SPEC.md b/pufferlib/ocean/dogfight/SPEC.md new file mode 100644 index 000000000..93bdebec8 --- /dev/null +++ b/pufferlib/ocean/dogfight/SPEC.md @@ -0,0 +1,51 @@ +Objective: Implement a high-performance simulation of world war 2 dogfighting as an RL environment into PufferLib. + +Requirements: +1. 1M+ steps per second simulation on a single CPU core. This is easily attainable by following the practice of other environments in PufferLib - no memory allocations after initialization, pass all structs by reference +2. Single-file C implementation header-only. A small, separate .c file will be used for testing. +3. Match PufferLib C API for RL environments +4. TDD Test Driven Development + +Environment details: +- Must model: managing throttle, aileron, rudder, elevator, and trigger to win dogfights in a real physics simulator with real approximation of Drag Polar, aerodynamic stall, etc +- Action space: Box(5) continuous [-1, 1]: throttle, elevator, ailerons, rudder, trigger (fire if > 0.5) +- Optional future: flaps +- Not modeling: full CFD, air turbulence, structural damage + +- Reasonably accurate physics, thrust, lift, drag, kinetic and potential energy, conservation of momentum, conservation of energy, lift changing with angle of attack, etc +- Try to approximately match the performance of real world war 2 aircraft +- Agents to learn to manage energy and air combat maneuvers to win dogfights + +Physics (3DOF point-mass, metric units): +- ρ = 1.225 kg/m³ (fixed sea level) +- q = 0.5 * ρ * V² (dynamic pressure, Pa) +- L = C_L * q * S (lift, N) +- D = (C_D0 + K * C_L²) * q * S (drag, N) +- T = min(P·η/V, 0.3·P) where P = ENGINE_POWER × throttle, η = 0.80 (propeller thrust, N) +- W = m * g (weight, N) + +Constraints: +- C_L ≤ 1.4 (stall) +- n = L/W ≤ 8 (structural g-limit) + +Approximations (valid for WW2, Mach < 0.6): +- Incompressible flow, flat earth, ignore prop torque/weather + +Instructions: +- Read pufferlib/ocean/[target, snake] for simple examples of API compatibility and code standards +- Read pufferlib/ocean/nmmo3/nmmo3.h for a much more complex environment with the same game tick system as the desired Olm environment +- The implementation will live in pufferlib/ocean/dogfight with dogfight.h being the source and dogfight.c being a tiny main file. +- Build with: python setup.py build_ext --inplace --force +- Use pufferlib/ocean/drone_race/ as a template +- Opponent will be generated programatically, very simple at first like just flying straight, adding maneuvers later + +References: +Links in CLAUDE.md +PufferAI docs: https://puffer.ai/docs.html +Reference environments: pufferlib/ocean. Source code is in .h files. Ignore .pyx. The .py files only contain bindings. + +Code style and optimization: +- Use the environments "squared," "target," and "template" as API references. You must implement c_step, c_reset, and c_render +- The only dependency is raylib, which is for rendering only +- Match the code style of "snake" and "nmmo3" closely: procedural C with minimal abstraction, functions mainly split out to avoid duplicating code. +- No memory allocations after initialization. Pass all structs by reference. diff --git a/pufferlib/ocean/dogfight/TRAINING_IMPROVEMENTS.md b/pufferlib/ocean/dogfight/TRAINING_IMPROVEMENTS.md new file mode 100644 index 000000000..3c410a70b --- /dev/null +++ b/pufferlib/ocean/dogfight/TRAINING_IMPROVEMENTS.md @@ -0,0 +1,274 @@ +# Dogfight Training Improvements + +Analysis and recommendations for improving agent training before implementing opponent AI. + +**Date**: 2026-01-13 +**Current Performance**: Phase 5 baseline - +23.44 mean return, 0.19 kills/episode, ~1.6% accuracy + +--- + +## Problem Analysis + +### Current Target Behavior + +From `step_plane()` (dogfight.h:317-324): +```c +void step_plane(Plane *p, float dt) { + Vec3 forward = quat_rotate(p->ori, vec3(1, 0, 0)); + float speed = norm3(p->vel); + if (speed < 1.0f) speed = 80.0f; + p->vel = mul3(forward, speed); + p->pos = add3(p->pos, mul3(p->vel, dt)); +} +``` + +From `respawn_opponent()` (dogfight.h:340-352): +```c +Vec3 vel = vec3(80, 0, 0); // Always flies +X direction +reset_plane(&env->opponent, opp_pos, vel); +``` + +**Result**: Target flies straight at 80 m/s, always in +X direction, forever. Never turns, never changes altitude. Orientation quaternion stays at identity. + +### Why Training Is Hard + +| Issue | Impact | +|-------|--------| +| Target always flies +X | If player spawns heading different direction, target flies away | +| No orientation variation | Target always faces +X regardless of spawn position | +| Constant speed = easy overshoot | Player accelerates to catch up, overshoots, target keeps going | +| 5° gun cone at 200m = ~17m radius | Very precise aiming required | +| No aiming reward | Agent gets no feedback until actual hit | + +### Current Results Breakdown + +- **0.19 kills/episode** - less than 1 kill per 5 episodes +- **~12 shots/episode** - agent learned to fire +- **1.6% accuracy** - agent did NOT learn to aim + +The agent learned pursuit and firing, but not aiming. + +--- + +## Improvement Recommendations + +### Priority 1: Fix Target Spawn Direction + +**Problem**: Target always flies +X regardless of where player is facing. + +**Solution A - Match player direction**: +```c +void respawn_opponent(Dogfight *env) { + Plane *p = &env->player; + Vec3 fwd = quat_rotate(p->ori, vec3(1, 0, 0)); + + Vec3 opp_pos = vec3( + p->pos.x + fwd.x * rndf(300, 600) + rndf(-100, 100), + p->pos.y + fwd.y * rndf(300, 600) + rndf(-100, 100), + clampf(p->pos.z + rndf(-100, 100), 200, 2500) + ); + + // KEY CHANGE: Opponent flies same direction as player + Vec3 vel = mul3(fwd, 80.0f); + reset_plane(&env->opponent, opp_pos, vel); + env->opponent.ori = p->ori; // Match orientation too +} +``` + +### Priority 2: Add Aiming Reward + +**Problem**: No feedback on aim quality until actual hit. + +**Solution**: Reward for having target near gun cone: +```c +// In c_step(), after pursuit rewards: +Vec3 to_opp = sub3(o->pos, p->pos); +float dist = norm3(to_opp); +Vec3 to_opp_norm = normalize3(to_opp); +Vec3 player_fwd = quat_rotate(p->ori, vec3(1, 0, 0)); +float aim_dot = dot3(to_opp_norm, player_fwd); // 1.0 = perfect aim + +// Reward for tracking (within 2x gun cone and in range) +if (aim_dot > cosf(GUN_CONE_ANGLE * 2) && dist < GUN_RANGE) { + reward += 0.05f; // Small continuous reward for good tracking +} + +// Bonus for very close aim (within gun cone but didn't fire) +if (aim_dot > cosf(GUN_CONE_ANGLE) && dist < GUN_RANGE) { + reward += 0.1f; // Stronger reward for firing solution +} +``` + +### Priority 3: Wider Gun Cone (Training Wheels) + +**Current**: 5° (0.087 rad) - realistic but hard +**Proposed**: Start with 10-15° for initial training + +```c +// Option: Make gun cone a parameter instead of constant +// In Dogfight struct: +float gun_cone_angle; // Set via config + +// Or just widen temporarily: +#define GUN_CONE_ANGLE 0.175f // ~10 degrees +``` + +### Priority 4: Target Behavior Modes + +Add different target behaviors for curriculum: + +```c +typedef enum { + TARGET_STRAIGHT = 0, // Current: flies straight + TARGET_CIRCLE = 1, // Constant gentle turn + TARGET_WEAVE = 2, // Sinusoidal lateral movement + TARGET_RANDOM = 3 // Occasional random turns +} TargetMode; + +void step_plane(Plane *p, float dt, TargetMode mode) { + switch (mode) { + case TARGET_STRAIGHT: + // Current behavior + break; + + case TARGET_CIRCLE: + // Constant turn rate + float turn_rate = 0.3f; // rad/s, ~17 deg/s + Quat turn = quat_from_axis_angle(vec3(0, 0, 1), turn_rate * dt); + p->ori = quat_mul(turn, p->ori); + quat_normalize(&p->ori); + break; + + case TARGET_WEAVE: + // Sinusoidal yaw + static float phase = 0; + phase += dt; + float yaw_rate = 0.5f * sinf(phase * 0.5f); + Quat weave = quat_from_axis_angle(vec3(0, 0, 1), yaw_rate * dt); + p->ori = quat_mul(weave, p->ori); + quat_normalize(&p->ori); + break; + } + + Vec3 forward = quat_rotate(p->ori, vec3(1, 0, 0)); + p->vel = mul3(forward, 80.0f); + p->pos = add3(p->pos, mul3(p->vel, dt)); +} +``` + +### Priority 5: Better Observations for Aiming + +Current observations (19 total): +- Player state: pos(3), vel(3), ori(4), up(3) = 13 +- Relative: pos(3), vel(3) = 6 + +**Missing**: Direct aim information + +**Add**: +```c +// After existing observations: + +// Aim dot product (1.0 = perfect aim, -1.0 = facing away) +Vec3 to_opp_norm = normalize3(sub3(o->pos, p->pos)); +Vec3 player_fwd = quat_rotate(p->ori, vec3(1, 0, 0)); +float aim_dot = dot3(to_opp_norm, player_fwd); +env->observations[i++] = aim_dot; + +// Distance normalized by gun range (0-1 = in range, >1 = out of range) +float dist = norm3(sub3(o->pos, p->pos)); +env->observations[i++] = dist / GUN_RANGE; + +// Update OBS_SIZE from 19 to 21 +``` + +--- + +## Curriculum Learning Plan + +| Level | Target Behavior | Speed | Gun Cone | Spawn Distance | +|-------|----------------|-------|----------|----------------| +| 1 | Stationary | 0 m/s | 15° | 200-300m | +| 2 | Slow straight | 40 m/s | 12° | 200-400m | +| 3 | Medium straight | 80 m/s | 10° | 300-500m | +| 4 | Gentle circles | 80 m/s | 7° | 300-600m | +| 5 | Variable | 80 m/s | 5° | 300-600m | + +--- + +## Implementation Order + +1. **Quick wins** (do first): + - [x] Fix opponent spawn to match player direction → **FAILED, REVERTED** (made things worse) + - [x] Add aiming reward → **SUCCESS** (+58% return, +89% kills, +125% accuracy) + - [x] Run benchmark to compare + +2. **If still struggling**: + - [ ] Widen gun cone temporarily + - [x] Add aim_dot and distance observations → **TESTED** in obs_scheme sweep (schemes 1-5 include aim helpers, but WORLD_FRAME scheme 0 still best - see OBSERVATION_EXPERIMENTS.md) + - [x] Run benchmark + +3. **For polish**: + - [x] Add target behavior modes → **DONE** autopilot system with 7 modes + - [x] Implement curriculum → **DONE** mode_weights for curriculum learning + +--- + +## Debug Tools + +### DEBUG flag in dogfight.h +Set `#define DEBUG 1` at the top of dogfight.h to enable verbose per-step logging: +- Actions (throttle, elevator, ailerons, rudder, trigger) +- Physics (speed, AoA, lift, drag, thrust, g-force) +- Target state (speed, position, direction) +- Reward breakdown (each component) +- Combat (aim angle, distance, in_cone, in_range) + +### Python sanity tests +Run `python pufferlib/ocean/dogfight/test_flight.py` to verify physics: +- Full throttle straight flight → ~150 m/s max +- Stall speed → ~50 m/s +- Climb rate → ~16 m/s +- Glide L/D → ~14.7 +- Turn tests → 30° and 60° bank with PID control + +--- + +## Test Ideas + +```c +void test_opponent_spawns_same_direction() { + Dogfight env = make_env(1000); + c_reset(&env); + + // Player and opponent should be flying roughly same direction + Vec3 player_fwd = quat_rotate(env.player.ori, vec3(1, 0, 0)); + Vec3 opp_fwd = quat_rotate(env.opponent.ori, vec3(1, 0, 0)); + float alignment = dot3(player_fwd, opp_fwd); + + assert(alignment > 0.9f); // Should be nearly parallel +} + +void test_aiming_reward() { + Dogfight env = make_env(1000); + c_reset(&env); + + // Place player aimed directly at opponent + env.player.pos = vec3(0, 0, 500); + env.player.ori = quat(1, 0, 0, 0); + env.opponent.pos = vec3(200, 0, 500); // Directly ahead + + c_step(&env); + float reward_aimed = env.rewards[0]; + + // Place player aimed away + c_reset(&env); + env.player.pos = vec3(0, 0, 500); + env.player.ori = quat_from_axis_angle(vec3(0, 0, 1), PI / 2); // 90° off + env.opponent.pos = vec3(200, 0, 500); + + c_step(&env); + float reward_not_aimed = env.rewards[0]; + + assert(reward_aimed > reward_not_aimed); +} +``` diff --git a/pufferlib/ocean/dogfight/autopilot.h b/pufferlib/ocean/dogfight/autopilot.h new file mode 100644 index 000000000..281b50fca --- /dev/null +++ b/pufferlib/ocean/dogfight/autopilot.h @@ -0,0 +1,346 @@ +/** + * autopilot.h - Target aircraft flight maneuvers + * + * Provides autopilot modes for opponent aircraft during training. + * Can be set randomly at reset or forced via API for curriculum learning. + */ + +#ifndef AUTOPILOT_H +#define AUTOPILOT_H + +#include "flightlib.h" +#include + +// Autopilot mode enumeration +typedef enum { + AP_STRAIGHT = 0, // Fly straight (current/default behavior) + AP_LEVEL, // Level flight with PD on vz + AP_TURN_LEFT, // Coordinated left turn + AP_TURN_RIGHT, // Coordinated right turn + AP_CLIMB, // Constant climb rate + AP_DESCEND, // Constant descent rate + AP_HARD_TURN_LEFT, // Aggressive 70° left turn + AP_HARD_TURN_RIGHT, // Aggressive 70° right turn + AP_WEAVE, // Sine wave jinking (S-turns) + AP_EVASIVE, // Break turn when threat behind + AP_RANDOM, // Random mode selection at reset + AP_COUNT +} AutopilotMode; + +// PID gains (from test_flight.py) +#define AP_LEVEL_KP 0.001f +#define AP_LEVEL_KD 0.001f +#define AP_TURN_ELEV_KP -0.05f +#define AP_TURN_ELEV_KD 0.005f +#define AP_TURN_ROLL_KP -2.0f +#define AP_TURN_ROLL_KD -0.1f + +// Default parameters +#define AP_DEFAULT_THROTTLE 1.0f +#define AP_DEFAULT_BANK_DEG 30.0f // Base gentle turns +#define AP_DEFAULT_CLIMB_RATE 5.0f + +// Stage-specific bank angles (curriculum progression) +#define AP_STAGE4_BANK_DEG 30.0f // MANEUVERING - gentle 30° turns +#define AP_STAGE5_BANK_DEG 45.0f // FULL_RANDOM - medium 45° turns +#define AP_STAGE6_BANK_DEG 60.0f // HARD_MANEUVERING - steep 60° turns +#define AP_HARD_BANK_DEG 70.0f // EVASIVE - aggressive 70° turns +#define AP_WEAVE_AMPLITUDE 0.6f // ~35° bank amplitude (radians) +#define AP_WEAVE_PERIOD 3.0f // 3 second full cycle + +// Autopilot state for a plane +typedef struct { + AutopilotMode mode; + int randomize_on_reset; // If true, pick random mode each reset + float throttle; // Target throttle [0,1] + float target_bank; // Target bank angle (radians) + float target_vz; // Target vertical velocity (m/s) + + // Curriculum: mode selection weights (sum to 1.0) + float mode_weights[AP_COUNT]; + + // Own RNG state (not affected by srand() calls) + unsigned int rng_state; + + // PID gains + float pitch_kp, pitch_kd; + float roll_kp, roll_kd; + + // PID state (for derivative terms) + float prev_vz; + float prev_bank_error; + + // AP_WEAVE state + float phase; // Sine wave phase for weave oscillation + + // AP_EVASIVE state (set by caller each step) + Vec3 threat_pos; // Position of threat to evade +} AutopilotState; + +// Simple LCG random for autopilot (not affected by srand) +static inline float ap_rand(AutopilotState* ap) { + ap->rng_state = ap->rng_state * 1103515245 + 12345; + return (float)((ap->rng_state >> 16) & 0x7FFF) / 32767.0f; +} + +// Initialize autopilot with defaults +static inline void autopilot_init(AutopilotState* ap) { + ap->mode = AP_STRAIGHT; + ap->randomize_on_reset = 0; + ap->throttle = AP_DEFAULT_THROTTLE; + ap->target_bank = AP_DEFAULT_BANK_DEG * (PI / 180.0f); + ap->target_vz = AP_DEFAULT_CLIMB_RATE; + + // Default: uniform weights for modes 1-5 (skip STRAIGHT and RANDOM) + for (int i = 0; i < AP_COUNT; i++) { + ap->mode_weights[i] = 0.0f; + } + float uniform = 1.0f / 5.0f; // 5 modes: LEVEL, TURN_L, TURN_R, CLIMB, DESCEND + ap->mode_weights[AP_LEVEL] = uniform; + ap->mode_weights[AP_TURN_LEFT] = uniform; + ap->mode_weights[AP_TURN_RIGHT] = uniform; + ap->mode_weights[AP_CLIMB] = uniform; + ap->mode_weights[AP_DESCEND] = uniform; + + // Seed autopilot RNG from system rand (called once at init, not affected by later srand) + ap->rng_state = (unsigned int)rand(); + + ap->pitch_kp = AP_LEVEL_KP; + ap->pitch_kd = AP_LEVEL_KD; + ap->roll_kp = AP_TURN_ROLL_KP; + ap->roll_kd = AP_TURN_ROLL_KD; + + ap->prev_vz = 0.0f; + ap->prev_bank_error = 0.0f; + + // New mode state + ap->phase = 0.0f; + ap->threat_pos = vec3(0, 0, 0); +} + +// Set autopilot mode with parameters +static inline void autopilot_set_mode(AutopilotState* ap, AutopilotMode mode, + float throttle, float bank_deg, float climb_rate) { + ap->mode = mode; + ap->randomize_on_reset = (mode == AP_RANDOM) ? 1 : 0; + ap->throttle = throttle; + ap->target_bank = bank_deg * (PI / 180.0f); + ap->target_vz = climb_rate; + + // Reset PID state on mode change + ap->prev_vz = 0.0f; + ap->prev_bank_error = 0.0f; + + // Set appropriate gains based on mode + if (mode == AP_LEVEL || mode == AP_CLIMB || mode == AP_DESCEND) { + ap->pitch_kp = AP_LEVEL_KP; + ap->pitch_kd = AP_LEVEL_KD; + } else if (mode == AP_TURN_LEFT || mode == AP_TURN_RIGHT) { + ap->pitch_kp = AP_TURN_ELEV_KP; + ap->pitch_kd = AP_TURN_ELEV_KD; + ap->roll_kp = AP_TURN_ROLL_KP; + ap->roll_kd = AP_TURN_ROLL_KD; + } +} + +// Randomize autopilot mode using weighted selection (for AP_RANDOM at reset) +static inline void autopilot_randomize(AutopilotState* ap) { + float r = ap_rand(ap); // Use own RNG, not affected by srand() + float cumsum = 0.0f; + AutopilotMode selected = AP_LEVEL; // Default fallback + + for (int i = 1; i < AP_COUNT - 1; i++) { // Skip STRAIGHT(0) and RANDOM(6) + cumsum += ap->mode_weights[i]; + if (r <= cumsum) { + selected = (AutopilotMode)i; + break; + } + } + + // Save randomize flag (autopilot_set_mode would clear it) + int save_randomize = ap->randomize_on_reset; + autopilot_set_mode(ap, selected, AP_DEFAULT_THROTTLE, + AP_DEFAULT_BANK_DEG, AP_DEFAULT_CLIMB_RATE); + ap->randomize_on_reset = save_randomize; +} + +// Get bank angle from plane orientation +// Returns positive for right bank, negative for left bank +static inline float ap_get_bank_angle(Plane* p) { + Vec3 up = quat_rotate(p->ori, vec3(0, 0, 1)); + float bank = acosf(fminf(fmaxf(up.z, -1.0f), 1.0f)); + if (up.y < 0) bank = -bank; + return bank; +} + +// Get vertical velocity from plane +static inline float ap_get_vz(Plane* p) { + return p->vel.z; +} + +// Clamp value to range +static inline float ap_clamp(float v, float lo, float hi) { + return fminf(fmaxf(v, lo), hi); +} + +// Main autopilot step function +// Computes actions[5] = [throttle, elevator, ailerons, rudder, trigger] +static inline void autopilot_step(AutopilotState* ap, Plane* p, float* actions, float dt) { + // Initialize all actions to zero + actions[0] = 0.0f; // throttle (will be set below) + actions[1] = 0.0f; // elevator + actions[2] = 0.0f; // ailerons + actions[3] = 0.0f; // rudder + actions[4] = -1.0f; // trigger (never fire) + + // Set throttle (convert from [0,1] to [-1,1] action space) + actions[0] = ap->throttle * 2.0f - 1.0f; + + float vz = ap_get_vz(p); + float bank = ap_get_bank_angle(p); + + switch (ap->mode) { + case AP_STRAIGHT: + // Do nothing - just fly straight with throttle + break; + + case AP_LEVEL: { + // PD control on vz to maintain level flight + float vz_error = -vz; // Target vz = 0 + float vz_deriv = (vz - ap->prev_vz) / dt; + float elevator = ap->pitch_kp * vz_error + ap->pitch_kd * vz_deriv; + actions[1] = ap_clamp(elevator, -1.0f, 1.0f); + ap->prev_vz = vz; + break; + } + + case AP_TURN_LEFT: + case AP_TURN_RIGHT: { + // Dual PID: roll to target bank, pitch to maintain altitude + float target_bank = ap->target_bank; + if (ap->mode == AP_TURN_LEFT) target_bank = -target_bank; + + // Elevator PID (maintain vz = 0) + float vz_error = -vz; + float vz_deriv = (vz - ap->prev_vz) / dt; + float elevator = ap->pitch_kp * vz_error + ap->pitch_kd * vz_deriv; + actions[1] = ap_clamp(elevator, -1.0f, 1.0f); + ap->prev_vz = vz; + + // Aileron PID (achieve target bank) + float bank_error = target_bank - bank; + float bank_deriv = (bank_error - ap->prev_bank_error) / dt; + float aileron = ap->roll_kp * bank_error + ap->roll_kd * bank_deriv; + actions[2] = ap_clamp(aileron, -1.0f, 1.0f); + ap->prev_bank_error = bank_error; + break; + } + + case AP_CLIMB: { + // PD control to maintain target climb rate + float vz_error = ap->target_vz - vz; + float vz_deriv = (vz - ap->prev_vz) / dt; + // Negative because nose-up pitch (negative elevator) increases climb + float elevator = -ap->pitch_kp * vz_error + ap->pitch_kd * vz_deriv; + actions[1] = ap_clamp(elevator, -1.0f, 1.0f); + ap->prev_vz = vz; + break; + } + + case AP_DESCEND: { + // PD control to maintain target descent rate + float vz_error = -ap->target_vz - vz; // Target is negative vz + float vz_deriv = (vz - ap->prev_vz) / dt; + float elevator = -ap->pitch_kp * vz_error + ap->pitch_kd * vz_deriv; + actions[1] = ap_clamp(elevator, -1.0f, 1.0f); + ap->prev_vz = vz; + break; + } + + case AP_HARD_TURN_LEFT: + case AP_HARD_TURN_RIGHT: { + // Aggressive turn with high bank angle (70°) + float target_bank = AP_HARD_BANK_DEG * (PI / 180.0f); + if (ap->mode == AP_HARD_TURN_LEFT) target_bank = -target_bank; + + // Hard pull to maintain altitude in steep bank + float vz_error = -vz; + float elevator = -0.5f + ap->pitch_kp * vz_error; // Base pull + PD + actions[1] = ap_clamp(elevator, -1.0f, 1.0f); + ap->prev_vz = vz; + + // Aggressive aileron to achieve bank (50% more aggressive) + float bank_error = target_bank - bank; + float aileron = ap->roll_kp * bank_error * 1.5f; + actions[2] = ap_clamp(aileron, -1.0f, 1.0f); + break; + } + + case AP_WEAVE: { + // Sine wave banking - oscillates left/right, hard to lead + ap->phase += dt * (2.0f * PI / AP_WEAVE_PERIOD); + if (ap->phase > 2.0f * PI) ap->phase -= 2.0f * PI; + + float target_bank = AP_WEAVE_AMPLITUDE * sinf(ap->phase); + + // Elevator PID for level flight (maintain vz = 0) + float vz_error = -vz; + float vz_deriv = (vz - ap->prev_vz) / dt; + float elevator = ap->pitch_kp * vz_error + ap->pitch_kd * vz_deriv; + actions[1] = ap_clamp(elevator, -1.0f, 1.0f); + ap->prev_vz = vz; + + // Aileron PID to track oscillating bank + float bank_error = target_bank - bank; + float bank_deriv = (bank_error - ap->prev_bank_error) / dt; + float aileron = ap->roll_kp * bank_error + ap->roll_kd * bank_deriv; + actions[2] = ap_clamp(aileron, -1.0f, 1.0f); + ap->prev_bank_error = bank_error; + break; + } + + case AP_EVASIVE: { + // Break turn away from threat when close and behind + Vec3 to_threat = sub3(ap->threat_pos, p->pos); + float dist = norm3(to_threat); + Vec3 fwd = quat_rotate(p->ori, vec3(1, 0, 0)); + float dot_fwd = dot3(normalize3(to_threat), fwd); + + float target_bank = 0.0f; + float base_elevator = 0.0f; + + // Check if threat is close (<600m) and not in front (behind or side) + if (dist < 600.0f && dot_fwd < 0.3f) { + // Threat close and behind - BREAK TURN! + // Determine which side threat is on + Vec3 right = quat_rotate(p->ori, vec3(0, -1, 0)); + float dot_right = dot3(normalize3(to_threat), right); + + // Turn away from threat (opposite side) + target_bank = (dot_right > 0) ? -1.2f : 1.2f; // ~70° break + base_elevator = -0.6f; // Pull hard + } + + // Elevator: base pull + PD for altitude + float vz_error = -vz; + float elevator = base_elevator + ap->pitch_kp * vz_error; + actions[1] = ap_clamp(elevator, -1.0f, 1.0f); + ap->prev_vz = vz; + + // Aileron to achieve break bank (aggressive) + float bank_error = target_bank - bank; + float aileron = ap->roll_kp * bank_error * 1.5f; + actions[2] = ap_clamp(aileron, -1.0f, 1.0f); + break; + } + + case AP_RANDOM: + // Should have been randomized at reset, fall through to straight + break; + + default: + break; + } +} + +#endif // AUTOPILOT_H diff --git a/pufferlib/ocean/dogfight/baselines/BASELINE_SUMMARY.md b/pufferlib/ocean/dogfight/baselines/BASELINE_SUMMARY.md new file mode 100644 index 000000000..e32c51af2 --- /dev/null +++ b/pufferlib/ocean/dogfight/baselines/BASELINE_SUMMARY.md @@ -0,0 +1,449 @@ +# Baseline Training Results + +Date: 2026-01-12 +Training: 100M steps each + +--- + +## Pre-Reward Shaping (baseline) +Reward: `-dist/10000` per step (pursuit only) + +| Run | Episode Return | Episode Length | +|-----|----------------|----------------| +| 1 | -31.78 | 1111 | +| 2 | -46.42 | 1247 | +| 3 | -73.12 | 1371 | +| **Mean** | **-50.44** | **1243** | + +Observations: +- High variance (-31 to -73) +- All returns negative +- Weak reward signal (~-0.03 per step) + +--- + +## Post-Reward Shaping (Phase 3.5) +Reward: base pursuit + closing velocity + tail position + altitude/speed penalties + +| Run | Episode Return | Episode Length | +|-----|----------------|----------------| +| 1 | -66.82 | 1140 | +| 2 | +5.32 | 1063 | +| 3 | +16.13 | 1050 | +| **Mean** | **-15.12** | **1084** | + +Observations: +- **2 of 3 runs achieved positive returns** (significant improvement) +- Shorter episodes (more decisive behavior) +- Still high variance (need more tuning) +- Closing velocity and tail position rewards working + +--- + +## Phase 5: Combat Mechanics +Reward: pursuit shaping + hit (+1.0) + kill (+10.0) + +| Run | Episode Return | Episode Length | Kills | Shots Hit/Fired | +|-----|----------------|----------------|-------|-----------------| +| 1 | +29.54 | 1047 | 0.24 | 0.24/12.0 | +| 2 | +12.46 | 1081 | 0.16 | 0.16/11.8 | +| 3 | +28.31 | 1061 | 0.18 | 0.18/11.7 | +| **Mean** | **+23.44** | **1063** | **0.19** | **0.19/11.8** | + +Observations: +- **All 3 runs positive** (major improvement from Phase 3.5 mean of -15.12) +- Agent learned to shoot (~12 shots/episode, ~1.6% accuracy) +- ~0.19 kills per episode on average +- Combat rewards providing clear learning signal + +--- + +## Spawn Direction Fix (FAILED) +Date: 2026-01-13 +Change: Make respawned opponent fly same direction as player instead of always +X + +| Run | Episode Return | Episode Length | Kills | Shots Hit/Fired | +|-----|----------------|----------------|-------|-----------------| +| 1 | -6.91 | 1107 | 0.133 | 0.133/11.0 | +| 2 | -97.46 | 1118 | 0.06 | 0.06/10.8 | +| 3 | -34.51 | 1075 | 0.061 | 0.06/12.4 | +| **Mean** | **-46.29** | **1100** | **0.085** | **0.08/11.4** | + +Observations: +- **Significantly worse than baseline** (-46.29 vs +23.44) +- Predictable +X direction was actually easier to learn +- **REVERTED** - keeping opponent always flies +X + +--- + +## Aiming Reward (SUCCESS) +Date: 2026-01-13 +Change: Add continuous reward for gun cone alignment (tracking bonus + firing solution bonus) + +| Run | Episode Return | Episode Length | Kills | Shots Hit/Fired | +|-----|----------------|----------------|-------|-----------------| +| 1 | +63.08 | 1067 | 0.51 | 0.51/10.2 | +| 2 | +12.64 | 1127 | 0.21 | 0.21/10.2 | +| 3 | +35.41 | 1113 | 0.37 | 0.37/9.9 | +| **Mean** | **+37.04** | **1102** | **0.36** | **0.36/10.1** | + +Observations: +- **+58% improvement in return** (+23.44 → +37.04) +- **+89% improvement in kills** (0.19 → 0.36) +- **+125% improvement in accuracy** (1.6% → 3.6%) +- Aiming reward provides gradient for learning to aim, not just fire + +--- + +## Physics Refactor (3582d2d4) - Pre-Quaternion Fix +Date: 2026-01-13 +Commit: 3582d2d4 "Physics in Own File - Test Flights" +Change: Moved physics to flightlib.h, added test_flight.py validation tests + +| Run | Episode Return | Episode Length | Kills | Shots Hit/Fired | +|-----|----------------|----------------|-------|-----------------| +| 1 | +45.32 | 1139 | 0.42 | 0.42/10.2 | +| 2 | +15.30 | 1136 | 0.19 | 0.19/10.2 | +| 3 | +51.87 | 1133 | 0.46 | 0.46/10.0 | +| **Mean** | **+37.50** | **1136** | **0.36** | **0.36/10.1** | + +Observations: +- Performance consistent with Aiming Reward baseline (+37.04 → +37.50) +- Physics refactor did not affect training +- test_flight.py shows climb_rate test failing (-29.6 vs +15.4 expected) +- Quaternion sign issue identified in test setup (not affecting training) + +--- + +## Coordinated Turn Tests (1c30c546) +Date: 2026-01-14 +Commit: 1c30c546 "Coordinated Turn Tests" +Change: Fixed quaternion signs in tests, added 60° coordinated turn test with PID validation (97% efficiency) + +| Run | Episode Return | Episode Length | Kills | Shots Hit/Fired | +|-----|----------------|----------------|-------|-----------------| +| 1 | +26.17 | 1151 | 0.29 | 0.29/10.5 | +| 2 | +55.99 | 1148 | 0.47 | 0.47/10.6 | +| 3 | +10.82 | 1151 | 0.20 | 0.20/9.6 | +| **Mean** | **+30.99** | **1150** | **0.32** | **0.32/10.2** | + +Observations: +- Performance consistent with previous baseline (+37.50 → +30.99, within variance) +- Test fixes did not affect training (physics unchanged) +- All tests now passing: max_speed, stall, climb, glide, turn_30, turn_60, pitch, roll + +--- + +## Performance Optimizations (374871df) +Date: 2026-01-14 +Commit: 374871df +Change: Replace divisions with multiplications by inverse constants; precompute gun cone cosf() per episode + +| Run | Episode Return | Episode Length | Kills | Shots Hit/Fired | +|-----|----------------|----------------|-------|-----------------| +| 1 | +44.39 | 1128 | 0.40 | 0.40/10.1 | +| 2 | +37.43 | 1139 | 0.34 | 0.34/9.9 | +| 3 | +45.54 | 1128 | 0.40 | 0.40/11.2 | +| **Mean** | **+42.45** | **1132** | **0.38** | **0.38/10.4** | + +Observations: +- **+37% improvement over previous baseline** (+30.99 → +42.45) +- 21 divisions replaced with multiplications (2.3x faster per op) +- Gun cone trig precomputed per episode (curriculum-ready) +- SPS: 1.2-1.3M + +--- + +## Autopilot Infrastructure (85980679) +Date: 2026-01-14 +Commit: 85980679 +Change: Add opponent autopilot system for curriculum learning (not enabled by default) + +| Run | Episode Return | Episode Length | Kills | Shots Hit/Fired | +|-----|----------------|----------------|-------|-----------------| +| 1 | +36.85 | 1140 | 0.33 | 0.33/9.1 | +| 2 | +55.26 | 1140 | 0.51 | 0.51/11.3 | +| 3 | +12.78 | 1150 | 0.25 | 0.25/10.9 | +| **Mean** | **+34.97** | **1143** | **0.36** | **0.36/10.4** | + +Changes: +- NEW: autopilot.h - 7 autopilot modes (STRAIGHT, LEVEL, TURN_LEFT/RIGHT, CLIMB, DESCEND, RANDOM) +- NEW: set_autopilot() Python API for curriculum learning +- Default: AP_STRAIGHT (identical to previous behavior) +- PID gains from test_flight.py validation + +Observations: +- Performance consistent with baseline (+42.45 → +34.97, within variance) +- **No regression** - autopilot infrastructure has negligible overhead +- Autopilot disabled by default (AP_STRAIGHT = old behavior) +- Ready for curriculum: call `env.set_autopilot(mode=AutopilotMode.RANDOM)` to enable + +--- + +## Vectorized set_autopilot (80bcf31e) +Date: 2026-01-14 +Commit: 80bcf31e +Change: Add vec_set_autopilot() C binding; set_autopilot(env_idx=None) sets all envs in one call + +| Run | Episode Return | Episode Length | Kills | Shots Hit/Fired | +|-----|----------------|----------------|-------|-----------------| +| 1 | +45.37 | 1153 | 0.47 | 0.47/10.6 | +| 2 | +51.04 | 1140 | 0.46 | 0.46/11.2 | +| 3 | +37.00 | 1110 | 0.35 | 0.35/10.8 | +| **Mean** | **+44.47** | **1134** | **0.43** | **0.43/10.9** | + +Changes: +- binding.c: Added vec_set_autopilot() for batch autopilot configuration +- dogfight.py: set_autopilot(env_idx=None) now sets all envs in one C call + +Observations: +- Performance consistent with baseline (+34.97 → +44.47, within variance) +- **No regression** - vectorized API adds no overhead during training +- Unblocks multi-env curriculum learning (no more N Python->C calls) + +--- + +## Mode Weights for Curriculum (0a1c2e6d) +Date: 2026-01-14 +Commit: 0a1c2e6d +Change: Add weighted random mode selection for curriculum learning + +| Run | Episode Return | Episode Length | Kills | Shots Hit/Fired | +|-----|----------------|----------------|-------|-----------------| +| 1 | +41.15 | 1139 | 0.36 | 0.36/11.0 | +| 2 | +53.82 | 1149 | 0.46 | 0.46/7.8 | +| 3 | +52.25 | 1133 | 0.45 | 0.45/10.2 | +| **Mean** | **+49.07** | **1140** | **0.42** | **0.42/9.7** | + +Changes: +- autopilot.h: Added mode_weights[AP_COUNT] array, weighted random selection in autopilot_randomize() +- autopilot.h: Added separate LCG RNG (rng_state) to avoid srand() interference from vec_reset +- binding.c: Added vec_set_mode_weights(), env_get_autopilot_mode() bindings +- dogfight.py: Added set_mode_weights(), get_autopilot_mode() methods +- test_flight.py: Added test_mode_weights() unit test + +Observations: +- Performance consistent with baseline (+44.47 → +49.07, within variance) +- **No regression** - mode weights infrastructure has negligible overhead +- Fixed RNG bug: autopilot now uses own LCG instead of shared rand() (was always selecting same mode) +- Ready for curriculum: `env.set_mode_weights(level=0.8, turn_left=0.1, turn_right=0.1)` to bias easy modes + +--- + +## Observation Scheme Sweep + +### Scheme 0: WORLD_FRAME (Baseline) +Date: 2026-01-14 +Config: obs_scheme = 0 +Observations: 19 (player pos/vel/ori/up + world-frame rel_pos/vel) + +| Run | Episode Return | Episode Length | Kills | Shots Hit/Fired | +|-----|----------------|----------------|-------|-----------------| +| 1 | +18.22 | 1128 | 0.20 | 0.20/10.1 | +| 2 | +19.71 | 1135 | 0.23 | 0.23/10.5 | +| 3 | +52.98 | 1139 | 0.46 | 0.46/8.0 | +| **Mean** | **+30.30** | **1134** | **0.30** | **0.30/9.5** | + +Observations: +- High variance between runs (18-53 return) +- Baseline for comparison with body-frame and angles schemes + +--- + +### Scheme 1: BODY_FRAME +Date: 2026-01-14 +Config: obs_scheme = 1 +Observations: 21 (body-frame rel_pos/vel + aim_dot + dist_norm) + +| Run | Episode Return | Episode Length | Kills | Shots Hit/Fired | +|-----|----------------|----------------|-------|-----------------| +| 1 | +50.12 | 1205 | 0.14 | 0.14/1.2 | +| 2 | +21.95 | 1292 | 0.02 | 0.02/0.5 | +| 3 | -5.26 | 1258 | 0.19 | 0.19/8.4 | +| **Mean** | **+22.27** | **1252** | **0.12** | **0.12/3.4** | + +Observations: +- **Worse than WORLD_FRAME** (+22.27 vs +30.30) +- Agent fires much less often (3.4 shots vs 9.5) +- Fewer kills despite aim helpers (0.12 vs 0.30) +- Higher variance - body-frame transform may confuse learning + +--- + +### Scheme 2: ANGLES +Date: 2026-01-14 +Config: obs_scheme = 2 +Observations: 12 (pos + speed + euler angles + azimuth/elevation/dist + closing_rate + opp_heading) + +| Run | Episode Return | Episode Length | Kills | Shots Hit/Fired | +|-----|----------------|----------------|-------|-----------------| +| 1 | +163.78 | 1198 | 0.01 | 0.01/0.02 | +| 2 | +71.56 | 1298 | 0.31 | 0.31/4.9 | +| 3 | +151.36 | 1263 | 0.01 | 0.01/0.06 | +| **Mean** | **+128.90** | **1253** | **0.11** | **0.11/1.7** | + +Observations: +- **Highest return** but misleading - agent exploits pursuit shaping without shooting +- 2 of 3 runs learned to not fire at all (0.02 and 0.06 shots) +- Only run 2 learned combat (0.31 kills) +- Smaller obs space (12) may lack info needed to learn trigger timing + +--- + +### Observation Scheme Summary + +| Scheme | Obs Size | Mean Return | Mean Kills | Shots/Ep | Notes | +|--------|----------|-------------|------------|----------|-------| +| 0: WORLD_FRAME | 19 | +30.30 | 0.30 | 9.5 | **Best combat learning** | +| 1: BODY_FRAME | 21 | +22.27 | 0.12 | 3.4 | Worse than baseline | +| 2: ANGLES | 12 | +128.90 | 0.11 | 1.7 | Exploits pursuit reward | + +--- + +### Scheme 3: CONTROL_ERROR +Date: 2026-01-14 +Config: obs_scheme = 3 +Observations: 17 (player state + pitch/yaw/roll errors to target + closing_rate + opp_heading) + +| Run | Episode Return | Episode Length | Kills | Shots Hit/Fired | +|-----|----------------|----------------|-------|-----------------| +| 1 | +165.98 | 1233 | 0.00 | 0.00/0.00 | +| 2 | +167.76 | 1238 | 0.00 | 0.00/0.00 | +| 3 | +165.45 | 1245 | 0.00 | 0.00/0.01 | +| **Mean** | **+166.40** | **1239** | **0.00** | **0.00/0.00** | + +Observations: +- **Highest return** but completely exploits pursuit reward +- Agent learned to not fire at all (0 shots across all runs) +- Control error obs may be too "solved" - agent just follows target + +--- + +### Scheme 4: REALISTIC +Date: 2026-01-14 +Config: obs_scheme = 4 +Observations: 10 (airspeed/altitude/pitch/roll + gunsight az/el/size + aspect/horizon/dist) + +| Run | Episode Return | Episode Length | Kills | Shots Hit/Fired | +|-----|----------------|----------------|-------|-----------------| +| 1 | +174.53 | 1159 | 0.00 | 0.00/0.01 | +| 2 | +171.96 | 1174 | 0.00 | 0.00/0.01 | +| 3 | +158.68 | 1252 | 0.00 | 0.00/0.01 | +| **Mean** | **+168.39** | **1195** | **0.00** | **0.00/0.01** | + +Observations: +- **Very high return** but no combat at all +- Smallest network (2.2K params) but same exploitation pattern +- Missing world position may prevent learning proper pursuit + +--- + +### Scheme 5: MAXIMALIST +Date: 2026-01-14 +Config: obs_scheme = 5 +Observations: 43 (everything: world+body velocities, quaternion+euler, world+body rel_pos/vel, angles, etc.) + +| Run | Episode Return | Episode Length | Kills | Shots Hit/Fired | +|-----|----------------|----------------|-------|-----------------| +| 1 | +90.95 | 1279 | 0.04 | 0.04/0.10 | +| 2 | +66.94 | 1167 | 0.31 | 0.31/2.1 | +| 3 | +92.29 | 1219 | 0.04 | 0.04/0.11 | +| **Mean** | **+83.39** | **1222** | **0.13** | **0.13/0.8** | + +Observations: +- Run 2 learned combat (0.31 kills) - only non-WORLD_FRAME scheme to do so reliably +- Lower return than pursuit-exploiting schemes but more combat +- Largest network (6.4K params) - may need more training time + +--- + +### Final Observation Scheme Summary + +| Scheme | Obs Size | Params | Mean Return | Mean Kills | Shots/Ep | Combat? | +|--------|----------|--------|-------------|------------|----------|---------| +| 0: WORLD_FRAME | 19 | 3.3K | +30.30 | **0.30** | 9.5 | **YES** | +| 1: BODY_FRAME | 21 | 3.6K | +22.27 | 0.12 | 3.4 | Weak | +| 2: ANGLES | 12 | 2.4K | +128.90 | 0.11 | 1.7 | No | +| 3: CONTROL_ERROR | 17 | 3.1K | +166.40 | 0.00 | 0.0 | No | +| 4: REALISTIC | 10 | 2.2K | +168.39 | 0.00 | 0.0 | No | +| 5: MAXIMALIST | 43 | 6.4K | +83.39 | 0.13 | 0.8 | 1/3 runs | + +**Conclusion:** WORLD_FRAME (scheme 0) is the best observation representation for learning combat: +- Only scheme where all 3 runs learned to fire consistently +- Best kill rate (0.30 kills/episode) +- The "engineered" schemes (ANGLES, CONTROL_ERROR, REALISTIC) all exploit pursuit reward without learning to shoot +- MAXIMALIST occasionally learns combat but inconsistently + +**Insight:** The pursuit reward shaping is too strong relative to kill rewards. Agents can achieve high return just by chasing without ever firing. The world-frame observations may make it harder to exploit this pattern because the agent can't "solve" pursuit as cleanly. + +--- + +## Sweepable Rewards v2 (04dd0167) +Date: 2026-01-14 +Commit: 04dd0167 +Change: Reward system overhaul - kill reward 10.0→1.0, all rewards sweepable via INI, perf metric for kill rate + +| Run | Episode Return | Episode Length | Kills | Perf | Accuracy | +|-----|----------------|----------------|-------|------|----------| +| 1 | -0.28 | 1155 | 1.18 | 0.706 | 1.3% | +| 2 | +43.03 | 1159 | 3.87 | 0.963 | 5.2% | +| 3 | +43.91 | 1152 | 5.57 | 0.988 | 6.3% | +| **Mean** | **+28.89** | **1155** | **3.54** | **0.886** | **4.3%** | + +Changes: +- Kill reward: 10.0 → 1.0 (fixed, not swept) +- Hit reward: 1.0 → 0.5 (sweepable) +- All shaping rewards now configurable via INI +- NEW: `perf` metric = fraction of episodes with kills (0.0-1.0) +- Episode-level kill/shot tracking + +**Comparison with Previous Best (Phase 5 Combat):** + +| Metric | Old (kill=10) | New (kill=1) | Change | +|--------|---------------|--------------|--------| +| Kills/ep | 0.19 | 3.54 | **+1763%** | +| Accuracy | 1.6% | 4.3% | **+169%** | +| Return | +23.44 | +28.89 | +23% | + +**Observations:** +- **Massive improvement in kills** - 18x more kills per episode +- 2/3 runs learned strong combat (perf > 0.96) +- Run 1 weaker but still learned some shooting +- Lower kill reward (1.0 vs 10.0) paradoxically improved learning +- Simpler reward signal easier to optimize + +--- + +## Terminal on Kill (a31d1dc7) +Date: 2026-01-15 +Commit: a31d1dc7 + +### Major Changes +1. **Terminal on kill**: Episode ends immediately when player kills opponent (was: respawn and continue) +2. **Binary perf metric**: `perf = env->kill ? 1.0 : 0.0` per episode (was: cumulative kills / episodes) +3. **Simplified Log struct**: Removed `kills`, `deaths`, `shots_hit` (redundant with terminal-on-kill) +4. **Kill flag on env**: Added `env->kill` to track success per episode +5. **Score = terminal reward**: `score = rewards[0]` (1.0 on kill, 0.0 on failure) + +### Results + +| Run | Episode Return | Episode Length | Perf | Shots/Ep | +|-----|----------------|----------------|------|----------| +| 1 | +120.65 | 1102 | 0.002 | 0.01 | +| 2 | +121.18 | 1264 | 0.002 | 0.01 | +| 3 | +76.90 | 1232 | 0.009 | 0.04 | +| **Mean** | **+106.24** | **1199** | **0.004** | **0.02** | + +### Comparison with Previous (Sweepable Rewards v2) + +| Metric | Before | After | Change | +|--------|--------|-------|--------| +| Perf | 0.886 | 0.004 | -99.5% | +| Shots/ep | 74.4 | 0.02 | -99.97% | +| Episode Return | +28.89 | +106.24 | +268% | + +### Observations +- Agent learned to NOT fire - perf dropped from 88.6% to 0.4% +- Very few shots fired (0.01-0.04 per episode vs 74 before) +- High return comes from pursuit shaping, not combat +- Needs investigation: DEBUG printf, check miss penalty, verify shots_fired tracking diff --git a/pufferlib/ocean/dogfight/binding.c b/pufferlib/ocean/dogfight/binding.c new file mode 100644 index 000000000..4c91a1a67 --- /dev/null +++ b/pufferlib/ocean/dogfight/binding.c @@ -0,0 +1,301 @@ +#include "dogfight.h" + +#define Env Dogfight + +// We need Python.h for the forward declaration, but env_binding.h includes it +// So we'll put the forward decl and MY_METHODS after including env_binding.h +// but we need MY_METHODS defined before... Let's restructure. + +// Include Python first to get PyObject type +#include + +// Forward declare our custom methods +static PyObject* env_force_state(PyObject* self, PyObject* args, PyObject* kwargs); +static PyObject* env_set_autopilot(PyObject* self, PyObject* args, PyObject* kwargs); +static PyObject* vec_set_autopilot(PyObject* self, PyObject* args, PyObject* kwargs); +static PyObject* vec_set_mode_weights(PyObject* self, PyObject* args, PyObject* kwargs); +static PyObject* env_get_autopilot_mode(PyObject* self, PyObject* args); +static PyObject* env_get_state(PyObject* self, PyObject* args); + +// Register custom methods before including the template +#define MY_METHODS \ + {"env_force_state", (PyCFunction)env_force_state, METH_VARARGS | METH_KEYWORDS, "Force environment state"}, \ + {"env_set_autopilot", (PyCFunction)env_set_autopilot, METH_VARARGS | METH_KEYWORDS, "Set opponent autopilot mode"}, \ + {"vec_set_autopilot", (PyCFunction)vec_set_autopilot, METH_VARARGS | METH_KEYWORDS, "Set autopilot for all envs"}, \ + {"vec_set_mode_weights", (PyCFunction)vec_set_mode_weights, METH_VARARGS | METH_KEYWORDS, "Set mode weights for all envs"}, \ + {"env_get_autopilot_mode", (PyCFunction)env_get_autopilot_mode, METH_VARARGS, "Get current autopilot mode"}, \ + {"env_get_state", (PyCFunction)env_get_state, METH_VARARGS, "Get raw player state"} + +// Helper to get float from kwargs with default (before env_binding.h since my_init uses it) +static float get_float(PyObject *kwargs, const char *key, float default_val) { + if (!kwargs) return default_val; + PyObject *val = PyDict_GetItemString(kwargs, key); + if (!val) return default_val; + if (PyFloat_Check(val)) return (float)PyFloat_AsDouble(val); + if (PyLong_Check(val)) return (float)PyLong_AsLong(val); + return default_val; +} + +// Helper to get int from kwargs with default +static int get_int(PyObject *kwargs, const char *key, int default_val) { + if (!kwargs) return default_val; + PyObject *val = PyDict_GetItemString(kwargs, key); + if (!val) return default_val; + if (PyLong_Check(val)) return (int)PyLong_AsLong(val); + if (PyFloat_Check(val)) return (int)PyFloat_AsDouble(val); + return default_val; +} + +#include "../env_binding.h" + +static int my_init(Env *env, PyObject *args, PyObject *kwargs) { + env->max_steps = unpack(kwargs, "max_steps"); + int obs_scheme = get_int(kwargs, "obs_scheme", 0); // Default to world frame + + // Build reward config from kwargs (all sweepable via INI) + RewardConfig rcfg = { + .closing_scale = get_float(kwargs, "reward_closing_scale", 0.002f), + .tail_scale = get_float(kwargs, "reward_tail_scale", 0.005f), + .tracking = get_float(kwargs, "reward_tracking", 0.05f), + .firing_solution = get_float(kwargs, "reward_firing_solution", 0.1f), + .stall = get_float(kwargs, "penalty_stall", 0.002f), + .roll = get_float(kwargs, "penalty_roll", 0.0001f), + .neg_g = get_float(kwargs, "penalty_neg_g", 0.002f), + .rudder = get_float(kwargs, "penalty_rudder", 0.0002f), + .aileron = get_float(kwargs, "penalty_aileron", 0.015f), + .bias = get_float(kwargs, "penalty_bias", 0.01f), + .approach = get_float(kwargs, "reward_approach", 0.005f), + .level = get_float(kwargs, "reward_level", 0.02f), + .alt_max = get_float(kwargs, "alt_max", 2500.0f), + .speed_min = get_float(kwargs, "speed_min", 50.0f), + }; + + // Curriculum learning params + int curriculum_enabled = get_int(kwargs, "curriculum_enabled", 0); + int curriculum_randomize = get_int(kwargs, "curriculum_randomize", 0); + int episodes_per_stage = get_int(kwargs, "episodes_per_stage", 15000); + + // Aim cone annealing params (reward shaping curriculum) + float aim_cone_start = get_float(kwargs, "aim_cone_start", 0.35f); // 20° in radians + float aim_cone_end = get_float(kwargs, "aim_cone_end", 0.087f); // 5° in radians + int aim_anneal_episodes = get_int(kwargs, "aim_anneal_episodes", 50000); + + int env_num = get_int(kwargs, "env_num", 0); + + init(env, obs_scheme, &rcfg, curriculum_enabled, curriculum_randomize, episodes_per_stage, aim_cone_start, aim_cone_end, aim_anneal_episodes, env_num); + return 0; +} + +static int my_log(PyObject *dict, Log *log) { + assign_to_dict(dict, "episode_return", log->episode_return); + assign_to_dict(dict, "episode_length", log->episode_length); + assign_to_dict(dict, "score", log->score); + assign_to_dict(dict, "perf", log->perf); + assign_to_dict(dict, "kills", log->kills); + assign_to_dict(dict, "shots_fired", log->shots_fired); + assign_to_dict(dict, "accuracy", log->accuracy); + assign_to_dict(dict, "stage", log->stage); + assign_to_dict(dict, "total_stage_weight", log->total_stage_weight); + assign_to_dict(dict, "avg_stage_weight", log->avg_stage_weight); + assign_to_dict(dict, "avg_abs_bias", log->avg_abs_bias); + assign_to_dict(dict, "ultimate", log->ultimate); + assign_to_dict(dict, "n", log->n); + return 0; +} + +// Force state wrapper - unpacks kwargs and calls C function +static PyObject* env_force_state(PyObject* self, PyObject* args, PyObject* kwargs) { + // First arg is env handle + if (PyTuple_Size(args) != 1) { + PyErr_SetString(PyExc_TypeError, "env_force_state requires 1 positional arg (env handle)"); + return NULL; + } + + Env* env = unpack_env(args); + if (!env) return NULL; + + // Extract all parameters with defaults + // Player position + float p_px = get_float(kwargs, "p_px", 0.0f); + float p_py = get_float(kwargs, "p_py", 0.0f); + float p_pz = get_float(kwargs, "p_pz", 1000.0f); + + // Player velocity + float p_vx = get_float(kwargs, "p_vx", 150.0f); + float p_vy = get_float(kwargs, "p_vy", 0.0f); + float p_vz = get_float(kwargs, "p_vz", 0.0f); + + // Player orientation (identity quat = wings level, flying +X) + float p_ow = get_float(kwargs, "p_ow", 1.0f); + float p_ox = get_float(kwargs, "p_ox", 0.0f); + float p_oy = get_float(kwargs, "p_oy", 0.0f); + float p_oz = get_float(kwargs, "p_oz", 0.0f); + + // Player throttle + float p_throttle = get_float(kwargs, "p_throttle", 1.0f); + + // Opponent position (-9999 = auto: 400m ahead) + float o_px = get_float(kwargs, "o_px", -9999.0f); + float o_py = get_float(kwargs, "o_py", -9999.0f); + float o_pz = get_float(kwargs, "o_pz", -9999.0f); + + // Opponent velocity (-9999 = auto: match player) + float o_vx = get_float(kwargs, "o_vx", -9999.0f); + float o_vy = get_float(kwargs, "o_vy", -9999.0f); + float o_vz = get_float(kwargs, "o_vz", -9999.0f); + + // Opponent orientation (-9999 = auto: match player) + float o_ow = get_float(kwargs, "o_ow", -9999.0f); + float o_ox = get_float(kwargs, "o_ox", -9999.0f); + float o_oy = get_float(kwargs, "o_oy", -9999.0f); + float o_oz = get_float(kwargs, "o_oz", -9999.0f); + + // Environment tick + int tick = get_int(kwargs, "tick", 0); + + // Call the C function + force_state(env, + p_px, p_py, p_pz, + p_vx, p_vy, p_vz, + p_ow, p_ox, p_oy, p_oz, + p_throttle, + o_px, o_py, o_pz, + o_vx, o_vy, o_vz, + o_ow, o_ox, o_oy, o_oz, + tick + ); + + Py_RETURN_NONE; +} + +// Set autopilot mode for opponent aircraft +static PyObject* env_set_autopilot(PyObject* self, PyObject* args, PyObject* kwargs) { + if (PyTuple_Size(args) != 1) { + PyErr_SetString(PyExc_TypeError, "env_set_autopilot requires 1 positional arg (env handle)"); + return NULL; + } + + Env* env = unpack_env(args); + if (!env) return NULL; + + // Get autopilot parameters + int mode = get_int(kwargs, "mode", AP_STRAIGHT); + float throttle = get_float(kwargs, "throttle", AP_DEFAULT_THROTTLE); + float bank_deg = get_float(kwargs, "bank_deg", AP_DEFAULT_BANK_DEG); + float climb_rate = get_float(kwargs, "climb_rate", AP_DEFAULT_CLIMB_RATE); + + // Set the autopilot mode + autopilot_set_mode(&env->opponent_ap, (AutopilotMode)mode, throttle, bank_deg, climb_rate); + + Py_RETURN_NONE; +} + +// Set autopilot mode for all environments (vectorized) +static PyObject* vec_set_autopilot(PyObject* self, PyObject* args, PyObject* kwargs) { + if (PyTuple_Size(args) != 1) { + PyErr_SetString(PyExc_TypeError, "vec_set_autopilot requires 1 positional arg (vec handle)"); + return NULL; + } + + VecEnv* vec = unpack_vecenv(args); + if (!vec) return NULL; + + // Get autopilot parameters + int mode = get_int(kwargs, "mode", AP_STRAIGHT); + float throttle = get_float(kwargs, "throttle", AP_DEFAULT_THROTTLE); + float bank_deg = get_float(kwargs, "bank_deg", AP_DEFAULT_BANK_DEG); + float climb_rate = get_float(kwargs, "climb_rate", AP_DEFAULT_CLIMB_RATE); + + // Set autopilot for all environments + for (int i = 0; i < vec->num_envs; i++) { + autopilot_set_mode(&vec->envs[i]->opponent_ap, (AutopilotMode)mode, + throttle, bank_deg, climb_rate); + } + + Py_RETURN_NONE; +} + +// Set mode weights for curriculum learning (vectorized) +static PyObject* vec_set_mode_weights(PyObject* self, PyObject* args, PyObject* kwargs) { + if (PyTuple_Size(args) != 1) { + PyErr_SetString(PyExc_TypeError, "vec_set_mode_weights requires 1 positional arg (vec handle)"); + return NULL; + } + + VecEnv* vec = unpack_vecenv(args); + if (!vec) return NULL; + + // Get weights for each mode (default 0.2 each for modes 1-5) + float w_level = get_float(kwargs, "level", 0.2f); + float w_turn_left = get_float(kwargs, "turn_left", 0.2f); + float w_turn_right = get_float(kwargs, "turn_right", 0.2f); + float w_climb = get_float(kwargs, "climb", 0.2f); + float w_descend = get_float(kwargs, "descend", 0.2f); + + // Set weights for all environments + for (int i = 0; i < vec->num_envs; i++) { + AutopilotState* ap = &vec->envs[i]->opponent_ap; + ap->mode_weights[AP_LEVEL] = w_level; + ap->mode_weights[AP_TURN_LEFT] = w_turn_left; + ap->mode_weights[AP_TURN_RIGHT] = w_turn_right; + ap->mode_weights[AP_CLIMB] = w_climb; + ap->mode_weights[AP_DESCEND] = w_descend; + } + + Py_RETURN_NONE; +} + +// Get current autopilot mode (for testing/debugging) +static PyObject* env_get_autopilot_mode(PyObject* self, PyObject* args) { + Env* env = unpack_env(args); + if (!env) return NULL; + + return PyLong_FromLong((long)env->opponent_ap.mode); +} + +// Get raw player state (for physics tests - independent of obs_scheme) +static PyObject* env_get_state(PyObject* self, PyObject* args) { + Env* env = unpack_env(args); + if (!env) return NULL; + + Plane* p = &env->player; + Vec3 up = quat_rotate(p->ori, vec3(0, 0, 1)); + Vec3 fwd = quat_rotate(p->ori, vec3(1, 0, 0)); + + PyObject* dict = PyDict_New(); + if (!dict) return NULL; + + // Position + PyDict_SetItemString(dict, "px", PyFloat_FromDouble(p->pos.x)); + PyDict_SetItemString(dict, "py", PyFloat_FromDouble(p->pos.y)); + PyDict_SetItemString(dict, "pz", PyFloat_FromDouble(p->pos.z)); + + // Velocity + PyDict_SetItemString(dict, "vx", PyFloat_FromDouble(p->vel.x)); + PyDict_SetItemString(dict, "vy", PyFloat_FromDouble(p->vel.y)); + PyDict_SetItemString(dict, "vz", PyFloat_FromDouble(p->vel.z)); + + // Orientation quaternion + PyDict_SetItemString(dict, "ow", PyFloat_FromDouble(p->ori.w)); + PyDict_SetItemString(dict, "ox", PyFloat_FromDouble(p->ori.x)); + PyDict_SetItemString(dict, "oy", PyFloat_FromDouble(p->ori.y)); + PyDict_SetItemString(dict, "oz", PyFloat_FromDouble(p->ori.z)); + + // Up vector (derived) + PyDict_SetItemString(dict, "up_x", PyFloat_FromDouble(up.x)); + PyDict_SetItemString(dict, "up_y", PyFloat_FromDouble(up.y)); + PyDict_SetItemString(dict, "up_z", PyFloat_FromDouble(up.z)); + + // Forward vector (derived) + PyDict_SetItemString(dict, "fwd_x", PyFloat_FromDouble(fwd.x)); + PyDict_SetItemString(dict, "fwd_y", PyFloat_FromDouble(fwd.y)); + PyDict_SetItemString(dict, "fwd_z", PyFloat_FromDouble(fwd.z)); + + // Throttle + PyDict_SetItemString(dict, "throttle", PyFloat_FromDouble(p->throttle)); + + // G-force (current G-loading) + PyDict_SetItemString(dict, "g_force", PyFloat_FromDouble(p->g_force)); + + return dict; +} diff --git a/pufferlib/ocean/dogfight/dogfight.h b/pufferlib/ocean/dogfight/dogfight.h new file mode 100644 index 000000000..2f6b216bf --- /dev/null +++ b/pufferlib/ocean/dogfight/dogfight.h @@ -0,0 +1,1580 @@ +// dogfight.h - WW2 aerial combat environment +// Uses flightlib.h for flight physics + +#include +#include +#include +#include +#include + +#include "raylib.h" +#include "rlgl.h" // For rlSetClipPlanes() + +// Define DEBUG before including flightlib.h so physics functions can use it +#define DEBUG 0 + +#include "flightlib.h" +#include "autopilot.h" + +// Observation scheme enumeration +typedef enum { + OBS_ANGLES = 0, // Spherical coordinates (12 obs) + OBS_CONTROL_ERROR = 1, // Control errors to target (17 obs) + OBS_REALISTIC = 2, // Cockpit instruments only (10 obs) + OBS_REALISTIC_RANGE = 3, // REALISTIC with explicit range (10 obs) + OBS_REALISTIC_ENEMY_STATE = 4, // + enemy pitch/roll/heading (13 obs) + OBS_REALISTIC_FULL = 5, // + turn rate + G-loading (15 obs) + OBS_SCHEME_COUNT +} ObsScheme; + +// Observation size lookup table +static const int OBS_SIZES[OBS_SCHEME_COUNT] = {12, 17, 10, 10, 13, 15}; + +// Curriculum learning stages (progressive difficulty) +// Reordered 2026-01-18: moved CROSSING from stage 2 to stage 6 (see CURRICULUM_PLANS.md) +typedef enum { + CURRICULUM_TAIL_CHASE = 0, // Easiest: opponent ahead, same heading + CURRICULUM_HEAD_ON, // Opponent coming toward us + CURRICULUM_VERTICAL, // Above or below player (was stage 3) + CURRICULUM_MANEUVERING, // Opponent does turns (was stage 4) + CURRICULUM_FULL_RANDOM, // Mix of all basic modes (was stage 5) + CURRICULUM_HARD_MANEUVERING, // Hard turns + weave patterns (was stage 6) + CURRICULUM_CROSSING, // 45 degree deflection shots (was stage 2, reduced from 90°) + CURRICULUM_EVASIVE, // Reactive evasion (hardest) + CURRICULUM_COUNT +} CurriculumStage; + +// Stage difficulty weights for composite metric (higher = harder = more valuable) +// Used to compute difficulty_weighted_perf = perf * avg_stage_weight +// Reordered 2026-01-18 to match new enum order (see CURRICULUM_PLANS.md) +static const float STAGE_WEIGHTS[CURRICULUM_COUNT] = { + 0.2f, // TAIL_CHASE - trivial + 0.3f, // HEAD_ON - easy + 0.4f, // VERTICAL - medium (was stage 3) + 0.5f, // MANEUVERING - medium (was stage 4) + 0.65f, // FULL_RANDOM - medium-hard (was stage 5) + 0.8f, // HARD_MANEUVERING - hard (was stage 6) + 0.9f, // CROSSING - hard, 45° deflection (was stage 2) + 1.0f // EVASIVE - hardest +}; + +// Simulation timing +#define DT 0.02f + +// World bounds +#define WORLD_HALF_X 2000.0f +#define WORLD_HALF_Y 2000.0f +#define WORLD_MAX_Z 3000.0f +#define MAX_SPEED 250.0f +#define OBS_SIZE 19 // player(13) + rel_pos(3) + rel_vel(3) + +// Inverse constants for faster normalization (multiply instead of divide) +#define INV_WORLD_HALF_X 0.0005f // 1/2000 +#define INV_WORLD_HALF_Y 0.0005f // 1/2000 +#define INV_WORLD_MAX_Z 0.000333333f // 1/3000 +#define INV_MAX_SPEED 0.004f // 1/250 + +// Combat constants +#define GUN_RANGE 500.0f // meters +#define GUN_CONE_ANGLE 0.087f // ~5 degrees in radians +#define FIRE_COOLDOWN 10 // ticks (0.2 seconds at 50Hz) + +typedef struct Log { + float episode_return; + float episode_length; + float score; // 1.0 on kill, 0.0 on failure + float perf; // sweep metric (same as kills) + float kills; // cumulative kills + float shots_fired; // cumulative shots + float accuracy; // kills / shots_fired * 100 + float stage; // current curriculum stage (for monitoring) + // Curriculum-weighted metrics (Phase 1) + float total_stage_weight; // Sum of stage weights across all episodes + float avg_stage_weight; // total_stage_weight / n + float total_abs_bias; // Sum of |aileron_bias| at episode end + float avg_abs_bias; // total_abs_bias / n + float ultimate; // Main sweep metric: kill_rate * avg_stage_weight / (1 + avg_abs_bias * 0.01) + float n; +} Log; + +// Death reason tracking for diagnostics +typedef enum DeathReason { + DEATH_NONE = 0, // Episode still running + DEATH_KILL = 1, // Player scored a kill (success) + DEATH_OOB = 2, // Out of bounds + DEATH_AILERON = 3, // Aileron limit exceeded + DEATH_TIMEOUT = 4, // Max steps reached + DEATH_SUPERSONIC = 5 // Physics blowup +} DeathReason; + +// Reward configuration (all values sweepable via INI) +typedef struct RewardConfig { + float closing_scale; // +N per m/s closing + float tail_scale; // ±N for tail position + float tracking; // +N when in 2x gun cone + float firing_solution; // +N when in 1x gun cone + float stall; // -N per m/s below speed_min + float roll; // -N per radian of bank angle (gentle level preference) + float neg_g; // -N per unit of negative G-loading + float rudder; // -N per unit of rudder magnitude + float aileron; // -N per unit of aileron magnitude (prevents constant rolling) + float bias; // -N per unit of cumulative signed aileron (prevents one-direction lock) + float approach; // +N per meter of distance closed this tick + float level; // +N per tick when approximately level (|bank|<30°, |pitch|<30°) + // Thresholds (not rewards) + float alt_max; // 2500.0 + float speed_min; // 50.0 +} RewardConfig; + +typedef struct Client { + Camera3D camera; + float width; + float height; + // Camera orbit state (for mouse control) + float cam_distance; + float cam_azimuth; + float cam_elevation; + bool is_dragging; + float last_mouse_x; + float last_mouse_y; +} Client; + +typedef struct Dogfight { + float *observations; + float *actions; + float *rewards; + unsigned char *terminals; + Log log; + Client *client; + int tick; + int max_steps; + float episode_return; + Plane player; + Plane opponent; + // Per-episode precomputed values (for curriculum learning) + float gun_cone_angle; // Hit detection cone (radians) - FIXED at 5° + float cos_gun_cone; // cosf(gun_cone_angle) - for hit detection + float cos_gun_cone_2x; // cosf(gun_cone_angle * 2) + // Reward shaping cone (anneals from large to small) + float reward_cone_angle; // Current reward cone (radians) - anneals + float cos_reward_cone; // cosf(reward_cone_angle) + float cos_reward_cone_2x; // cosf(reward_cone_angle * 2) + // Aim cone annealing parameters + float aim_cone_start; // Starting reward cone (radians, e.g., 20° = 0.35) + float aim_cone_end; // Ending reward cone (radians, e.g., 5° = 0.087) + int aim_anneal_episodes; // Episodes to fully anneal + // Opponent autopilot + AutopilotState opponent_ap; + // Observation scheme + int obs_scheme; + int obs_size; + // Reward configuration (sweepable) + RewardConfig rcfg; + // Episode-level tracking (reset each episode) + int kill; // 1 if killed this episode, 0 otherwise + float episode_shots_fired; // For accuracy tracking + // Curriculum learning + int curriculum_enabled; // 0 = off (legacy spawning), 1 = on + int curriculum_randomize; // 0 = progressive (training), 1 = random stage each episode (eval) + int episodes_per_stage; // Episodes before advancing to next stage + int total_episodes; // Cumulative episodes (persists across resets) + CurriculumStage stage; // Current difficulty stage + // Anti-spinning + float total_aileron_usage; // Accumulated |aileron| input (for spin death) + float aileron_bias; // Cumulative signed aileron (for directional penalty) + float prev_dist; // Previous distance to opponent (for approach reward) + // Episode reward accumulators (for DEBUG summaries) + float sum_r_approach; + float sum_r_closing; + float sum_r_tail; + float sum_r_speed; + float sum_r_roll; + float sum_r_neg_g; + float sum_r_rudder; + float sum_r_aileron; + float sum_r_bias; + float sum_r_level; + float sum_r_aim; + // Aiming diagnostics (reset each episode, for DEBUG output) + float best_aim_angle; // Best (smallest) aim angle achieved (radians) + int ticks_in_cone; // Ticks where aim_dot > cos_reward_cone + float closest_dist; // Closest approach to target (meters) + // Flight envelope diagnostics (reset each episode, for DEBUG output) + float max_g, min_g; // Peak G-forces experienced + float max_bank; // Peak bank angle (abs, radians) + float max_pitch; // Peak pitch angle (abs, radians) + float min_speed, max_speed; // Speed envelope (m/s) + float min_alt, max_alt; // Altitude envelope (m) + float sum_throttle; // For computing mean throttle + int trigger_pulls; // Times trigger was pulled (>0.5) + int prev_trigger; // For edge detection + DeathReason death_reason; + // Debug + int env_num; // Environment index (for filtering debug output) +} Dogfight; + +void init(Dogfight *env, int obs_scheme, RewardConfig *rcfg, int curriculum_enabled, int curriculum_randomize, int episodes_per_stage, float aim_cone_start, float aim_cone_end, int aim_anneal_episodes, int env_num) { + env->log = (Log){0}; + env->tick = 0; + env->env_num = env_num; + env->episode_return = 0.0f; + env->client = NULL; + // Observation scheme + env->obs_scheme = (obs_scheme >= 0 && obs_scheme < OBS_SCHEME_COUNT) ? obs_scheme : 0; + env->obs_size = OBS_SIZES[env->obs_scheme]; + // Gun cone for HIT DETECTION - fixed at 5° + env->gun_cone_angle = GUN_CONE_ANGLE; + env->cos_gun_cone = cosf(env->gun_cone_angle); + env->cos_gun_cone_2x = cosf(env->gun_cone_angle * 2.0f); + // Aim cone annealing for REWARD SHAPING + env->aim_cone_start = aim_cone_start > 0.0f ? aim_cone_start : 0.35f; // Default 20° + env->aim_cone_end = aim_cone_end > 0.0f ? aim_cone_end : GUN_CONE_ANGLE; // Default 5° + env->aim_anneal_episodes = aim_anneal_episodes > 0 ? aim_anneal_episodes : 50000; + // Initialize reward cone to start value + env->reward_cone_angle = env->aim_cone_start; + env->cos_reward_cone = cosf(env->reward_cone_angle); + env->cos_reward_cone_2x = cosf(env->reward_cone_angle * 2.0f); + // Initialize opponent autopilot + autopilot_init(&env->opponent_ap); + // Reward configuration (copy from provided config) + env->rcfg = *rcfg; + // Episode tracking + env->kill = 0; + env->episode_shots_fired = 0.0f; + // Curriculum learning + env->curriculum_enabled = curriculum_enabled; + env->curriculum_randomize = curriculum_randomize; + env->episodes_per_stage = episodes_per_stage > 0 ? episodes_per_stage : 15000; + env->total_episodes = 0; + env->stage = CURRICULUM_TAIL_CHASE; + env->total_aileron_usage = 0.0f; +} + +void add_log(Dogfight *env) { + // Level 1: Episode summary (one line, easy to grep) + if (DEBUG >= 1 && env->env_num == 0) { + const char* death_names[] = {"NONE", "KILL", "OOB", "AILERON", "TIMEOUT", "SUPERSONIC"}; + float mean_ail = env->total_aileron_usage / fmaxf((float)env->tick, 1.0f); + printf("EP tick=%d ret=%.2f death=%s kill=%d stage=%d mean_ail=%.2f bias=%.1f\n", + env->tick, env->episode_return, death_names[env->death_reason], + env->kill, env->stage, mean_ail, env->aileron_bias); + } + + // Level 2: Reward breakdown (which components dominated?) + if (DEBUG >= 2 && env->env_num == 0) { + printf(" SHAPING: approach=%+.2f closing=%+.2f tail=%+.2f level=%+.2f\n", + env->sum_r_approach, env->sum_r_closing, env->sum_r_tail, env->sum_r_level); + printf(" COMBAT: aim=%+.2f\n", env->sum_r_aim); + printf(" PENALTY: speed=%.2f roll=%.2f neg_g=%.2f rudder=%.2f ail=%.2f bias=%.2f\n", + env->sum_r_speed, env->sum_r_roll, env->sum_r_neg_g, + env->sum_r_rudder, env->sum_r_aileron, env->sum_r_bias); + printf(" AIM: best=%.1f° in_cone=%d/%d (%.0f%%) closest=%.0fm\n", + env->best_aim_angle * RAD_TO_DEG, + env->ticks_in_cone, env->tick, + 100.0f * env->ticks_in_cone / fmaxf((float)env->tick, 1.0f), + env->closest_dist); + } + + // Level 3: Flight envelope and control statistics + if (DEBUG >= 3 && env->env_num == 0) { + float mean_throttle = env->sum_throttle / fmaxf((float)env->tick, 1.0f); + printf(" FLIGHT: G=[%+.1f,%+.1f] bank=%.0f° pitch=%.0f° speed=[%.0f,%.0f] alt=[%.0f,%.0f]\n", + env->min_g, env->max_g, + env->max_bank * RAD_TO_DEG, env->max_pitch * RAD_TO_DEG, + env->min_speed, env->max_speed, + env->min_alt, env->max_alt); + printf(" CONTROL: mean_throttle=%.0f%% trigger_pulls=%d shots=%d\n", + mean_throttle * 100.0f, env->trigger_pulls, (int)env->episode_shots_fired); + } + + if (DEBUG >= 10) printf("=== ADD_LOG ===\n"); + if (DEBUG >= 10) printf(" kill=%d, episode_return=%.2f, tick=%d\n", env->kill, env->episode_return, env->tick); + if (DEBUG >= 10) printf(" episode_shots_fired=%.0f, reward=%.2f\n", env->episode_shots_fired, env->rewards[0]); + env->log.episode_return += env->episode_return; + env->log.episode_length += (float)env->tick; + env->log.perf += env->kill ? 1.0f : 0.0f; + env->log.kills += env->kill ? 1.0f : 0.0f; + env->log.score += env->rewards[0]; + env->log.shots_fired += env->episode_shots_fired; + env->log.accuracy = (env->log.shots_fired > 0.0f) ? (env->log.kills / env->log.shots_fired * 100.0f) : 0.0f; + env->log.stage = (float)env->stage; // Track curriculum stage + + // Curriculum-weighted metrics (Phase 1) + // Track difficulty faced and compute composite metric + env->log.total_stage_weight += STAGE_WEIGHTS[env->stage]; + env->log.total_abs_bias += fabsf(env->aileron_bias); // Track bias at episode end + env->log.n += 1.0f; + env->log.avg_stage_weight = env->log.total_stage_weight / env->log.n; + env->log.avg_abs_bias = env->log.total_abs_bias / env->log.n; + + // ultimate = kill_rate * stage_weight / (1 + avg_abs_bias * 0.01) + // Rewards killing hard opponents, penalizes degenerate aileron bias + float kill_rate = env->log.kills / env->log.n; + float difficulty_weighted = kill_rate * env->log.avg_stage_weight; + float bias_divisor = 1.0f + env->log.avg_abs_bias * 0.1f; // min 1.0, safe + env->log.ultimate = difficulty_weighted / bias_divisor; + + if (DEBUG >= 10) printf(" log.perf=%.2f, log.shots_fired=%.0f, log.n=%.0f\n", env->log.perf, env->log.shots_fired, env->log.n); +} + +// Scheme 0: Angles observations (spherical coordinates) +void compute_obs_angles(Dogfight *env) { + Plane *p = &env->player; + Plane *o = &env->opponent; + + Quat q_inv = {p->ori.w, -p->ori.x, -p->ori.y, -p->ori.z}; + + // Player Euler angles from quaternion + float pitch = asinf(clampf(2.0f * (p->ori.w * p->ori.y - p->ori.z * p->ori.x), -1.0f, 1.0f)); + float roll = atan2f(2.0f * (p->ori.w * p->ori.x + p->ori.y * p->ori.z), + 1.0f - 2.0f * (p->ori.x * p->ori.x + p->ori.y * p->ori.y)); + float yaw = atan2f(2.0f * (p->ori.w * p->ori.z + p->ori.x * p->ori.y), + 1.0f - 2.0f * (p->ori.y * p->ori.y + p->ori.z * p->ori.z)); + + // Target in body frame → spherical + Vec3 rel_pos = sub3(o->pos, p->pos); + Vec3 rel_pos_body = quat_rotate(q_inv, rel_pos); + float dist = norm3(rel_pos); + + float azimuth = atan2f(rel_pos_body.y, rel_pos_body.x); // -pi to pi + float r_horiz = sqrtf(rel_pos_body.x * rel_pos_body.x + rel_pos_body.y * rel_pos_body.y); + float elevation = atan2f(rel_pos_body.z, fmaxf(r_horiz, 1e-6f)); // -pi/2 to pi/2 + + // Closing rate + Vec3 rel_vel = sub3(p->vel, o->vel); + float closing_rate = dot3(rel_vel, normalize3(rel_pos)); + + // Opponent heading relative to player + Vec3 opp_fwd = quat_rotate(o->ori, vec3(1, 0, 0)); + Vec3 opp_fwd_body = quat_rotate(q_inv, opp_fwd); + float opp_heading = atan2f(opp_fwd_body.y, opp_fwd_body.x); + + int i = 0; + // Player state + env->observations[i++] = p->pos.x * INV_WORLD_HALF_X; + env->observations[i++] = p->pos.y * INV_WORLD_HALF_Y; + env->observations[i++] = p->pos.z * INV_WORLD_MAX_Z; + env->observations[i++] = norm3(p->vel) * INV_MAX_SPEED; // Speed scalar + env->observations[i++] = pitch / PI; // -0.5 to 0.5 + env->observations[i++] = roll / PI; // -1 to 1 + env->observations[i++] = yaw / PI; // -1 to 1 + + // Target angles + env->observations[i++] = azimuth / PI; // -1 to 1 + env->observations[i++] = elevation / (PI * 0.5f); // -1 to 1 + env->observations[i++] = clampf(dist / GUN_RANGE, 0.0f, 4.0f) - 2.0f; // ~[-1,1] + env->observations[i++] = closing_rate * INV_MAX_SPEED; + + // Opponent info + env->observations[i++] = opp_heading / PI; // -1 to 1 + // OBS_SIZE = 12 +} + +// Scheme 3: Control error observations (what inputs would point at target?) +void compute_obs_control_error(Dogfight *env) { + Plane *p = &env->player; + Plane *o = &env->opponent; + + Quat q_inv = {p->ori.w, -p->ori.x, -p->ori.y, -p->ori.z}; + + // Up vector (world frame) + Vec3 up = quat_rotate(p->ori, vec3(0, 0, 1)); + + // Target in body frame + Vec3 rel_pos = sub3(o->pos, p->pos); + Vec3 rel_pos_body = quat_rotate(q_inv, rel_pos); + float dist = norm3(rel_pos); + Vec3 to_target_norm = normalize3(rel_pos_body); + + // Control errors: how to point at target + float pitch_error = asinf(clampf(to_target_norm.z, -1.0f, 1.0f)); // + = pitch up needed + float yaw_error = atan2f(to_target_norm.y, to_target_norm.x); // + = yaw right needed + + // Roll to turn: if target is right (y>0), roll right helps turn toward it + // This is the bank angle that would help turn toward target + float roll_to_turn = atan2f(to_target_norm.y, fabsf(to_target_norm.x) + 0.1f); + + // Closing rate + Vec3 rel_vel = sub3(p->vel, o->vel); + float closing_rate = dot3(rel_vel, normalize3(rel_pos)); + + // Opponent heading relative to player + Vec3 opp_fwd = quat_rotate(o->ori, vec3(1, 0, 0)); + Vec3 opp_fwd_body = quat_rotate(q_inv, opp_fwd); + float opp_heading = atan2f(opp_fwd_body.y, opp_fwd_body.x); + + int i = 0; + // Player state (11 obs) + env->observations[i++] = p->pos.x * INV_WORLD_HALF_X; + env->observations[i++] = p->pos.y * INV_WORLD_HALF_Y; + env->observations[i++] = p->pos.z * INV_WORLD_MAX_Z; + env->observations[i++] = norm3(p->vel) * INV_MAX_SPEED; // Speed scalar + env->observations[i++] = p->ori.w; + env->observations[i++] = p->ori.x; + env->observations[i++] = p->ori.y; + env->observations[i++] = p->ori.z; + env->observations[i++] = up.x; + env->observations[i++] = up.y; + env->observations[i++] = up.z; + + // Control errors (4 obs) - THE KEY INFO + env->observations[i++] = pitch_error / (PI * 0.5f); // -1 to 1 + env->observations[i++] = yaw_error / PI; // -1 to 1 + env->observations[i++] = roll_to_turn / (PI * 0.5f); // -1 to 1 + env->observations[i++] = clampf(dist / GUN_RANGE, 0.0f, 4.0f) - 2.0f; + + // Target info (2 obs) + env->observations[i++] = closing_rate * INV_MAX_SPEED; + env->observations[i++] = opp_heading / PI; + // OBS_SIZE = 17 +} + +// Scheme 4: Realistic cockpit instruments only +void compute_obs_realistic(Dogfight *env) { + Plane *p = &env->player; + Plane *o = &env->opponent; + + Quat q_inv = {p->ori.w, -p->ori.x, -p->ori.y, -p->ori.z}; + + // Player Euler angles + float pitch = asinf(clampf(2.0f * (p->ori.w * p->ori.y - p->ori.z * p->ori.x), -1.0f, 1.0f)); + float roll = atan2f(2.0f * (p->ori.w * p->ori.x + p->ori.y * p->ori.z), + 1.0f - 2.0f * (p->ori.x * p->ori.x + p->ori.y * p->ori.y)); + + // Target in body frame for gunsight + Vec3 rel_pos = sub3(o->pos, p->pos); + Vec3 rel_pos_body = quat_rotate(q_inv, rel_pos); + float dist = norm3(rel_pos); + + float target_az = atan2f(rel_pos_body.y, rel_pos_body.x); + float r_horiz = sqrtf(rel_pos_body.x * rel_pos_body.x + rel_pos_body.y * rel_pos_body.y); + float target_el = atan2f(rel_pos_body.z, fmaxf(r_horiz, 1e-6f)); + + // Target apparent size (larger when closer) + float target_size = 20.0f / fmaxf(dist, 10.0f); // ~wingspan/distance + + // Opponent aspect (are they facing toward/away from us?) + Vec3 opp_fwd = quat_rotate(o->ori, vec3(1, 0, 0)); + Vec3 to_player = normalize3(sub3(p->pos, o->pos)); + float target_aspect = dot3(opp_fwd, to_player); // 1 = head-on, -1 = tail + + // Horizon visible (is up vector pointing up?) + Vec3 up = quat_rotate(p->ori, vec3(0, 0, 1)); + float horizon_visible = up.z; // 1 = level, 0 = knife-edge, -1 = inverted + + int i = 0; + // Instruments (4 obs) + env->observations[i++] = norm3(p->vel) * INV_MAX_SPEED; // Airspeed + env->observations[i++] = p->pos.z * INV_WORLD_MAX_Z; // Altitude + env->observations[i++] = pitch / (PI * 0.5f); // Pitch indicator + env->observations[i++] = roll / PI; // Bank indicator + + // Gunsight (3 obs) + env->observations[i++] = target_az / PI; // Target azimuth in sight + env->observations[i++] = target_el / (PI * 0.5f); // Target elevation in sight + env->observations[i++] = clampf(target_size, 0.0f, 2.0f) - 1.0f; // Target size + + // Visual cues (3 obs) + env->observations[i++] = target_aspect; // -1 to 1 + env->observations[i++] = horizon_visible; // -1 to 1 + env->observations[i++] = clampf(dist / GUN_RANGE, 0.0f, 4.0f) - 2.0f; // Distance estimate + // OBS_SIZE = 10 +} + +// Scheme 3: REALISTIC with explicit range (10 obs) +// Like REALISTIC but with km range + closure rate instead of target_size + distance_estimate +void compute_obs_realistic_range(Dogfight *env) { + Plane *p = &env->player; + Plane *o = &env->opponent; + + Quat q_inv = {p->ori.w, -p->ori.x, -p->ori.y, -p->ori.z}; + + // Player Euler angles + float pitch = asinf(clampf(2.0f * (p->ori.w * p->ori.y - p->ori.z * p->ori.x), -1.0f, 1.0f)); + float roll = atan2f(2.0f * (p->ori.w * p->ori.x + p->ori.y * p->ori.z), + 1.0f - 2.0f * (p->ori.x * p->ori.x + p->ori.y * p->ori.y)); + + // Target in body frame for gunsight + Vec3 rel_pos = sub3(o->pos, p->pos); + Vec3 rel_pos_body = quat_rotate(q_inv, rel_pos); + float dist = norm3(rel_pos); + + float target_az = atan2f(rel_pos_body.y, rel_pos_body.x); + float r_horiz = sqrtf(rel_pos_body.x * rel_pos_body.x + rel_pos_body.y * rel_pos_body.y); + float target_el = atan2f(rel_pos_body.z, fmaxf(r_horiz, 1e-6f)); + + // Range in km (0 = point blank, 0.5 = 1km, 1.0 = 2km+) + float range_km = clampf(dist / 2000.0f, 0.0f, 1.0f); + + // Opponent aspect (are they facing toward/away from us?) + Vec3 opp_fwd = quat_rotate(o->ori, vec3(1, 0, 0)); + Vec3 to_player = normalize3(sub3(p->pos, o->pos)); + float target_aspect = dot3(opp_fwd, to_player); // 1 = head-on, -1 = tail + + // Horizon visible (is up vector pointing up?) + Vec3 up = quat_rotate(p->ori, vec3(0, 0, 1)); + float horizon_visible = up.z; // 1 = level, 0 = knife-edge, -1 = inverted + + // Closure rate (positive = closing) + Vec3 rel_vel = sub3(p->vel, o->vel); + float closure_rate = dot3(rel_vel, normalize3(rel_pos)); + + int i = 0; + // Instruments (4 obs) + env->observations[i++] = norm3(p->vel) * INV_MAX_SPEED; // Airspeed + env->observations[i++] = p->pos.z * INV_WORLD_MAX_Z; // Altitude + env->observations[i++] = pitch / (PI * 0.5f); // Pitch indicator + env->observations[i++] = roll / PI; // Bank indicator + + // Gunsight (3 obs) + env->observations[i++] = target_az / PI; // Target azimuth in sight + env->observations[i++] = target_el / (PI * 0.5f); // Target elevation in sight + env->observations[i++] = range_km; // Range: 0=close, 1=2km+ + + // Visual cues (3 obs) + env->observations[i++] = target_aspect; // -1 to 1 + env->observations[i++] = horizon_visible; // -1 to 1 + env->observations[i++] = clampf(closure_rate * INV_MAX_SPEED, -1.0f, 1.0f); // Closure rate + // OBS_SIZE = 10 +} + +// Scheme 4: REALISTIC_ENEMY_STATE (13 obs) +// REALISTIC_RANGE + enemy pitch/roll/heading +void compute_obs_realistic_enemy_state(Dogfight *env) { + Plane *p = &env->player; + Plane *o = &env->opponent; + + Quat q_inv = {p->ori.w, -p->ori.x, -p->ori.y, -p->ori.z}; + + // Player Euler angles + float pitch = asinf(clampf(2.0f * (p->ori.w * p->ori.y - p->ori.z * p->ori.x), -1.0f, 1.0f)); + float roll = atan2f(2.0f * (p->ori.w * p->ori.x + p->ori.y * p->ori.z), + 1.0f - 2.0f * (p->ori.x * p->ori.x + p->ori.y * p->ori.y)); + + // Target in body frame for gunsight + Vec3 rel_pos = sub3(o->pos, p->pos); + Vec3 rel_pos_body = quat_rotate(q_inv, rel_pos); + float dist = norm3(rel_pos); + + float target_az = atan2f(rel_pos_body.y, rel_pos_body.x); + float r_horiz = sqrtf(rel_pos_body.x * rel_pos_body.x + rel_pos_body.y * rel_pos_body.y); + float target_el = atan2f(rel_pos_body.z, fmaxf(r_horiz, 1e-6f)); + + // Range in km + float range_km = clampf(dist / 2000.0f, 0.0f, 1.0f); + + // Opponent aspect + Vec3 opp_fwd = quat_rotate(o->ori, vec3(1, 0, 0)); + Vec3 to_player = normalize3(sub3(p->pos, o->pos)); + float target_aspect = dot3(opp_fwd, to_player); + + // Horizon visible + Vec3 up = quat_rotate(p->ori, vec3(0, 0, 1)); + float horizon_visible = up.z; + + // Closure rate + Vec3 rel_vel = sub3(p->vel, o->vel); + float closure_rate = dot3(rel_vel, normalize3(rel_pos)); + + // Enemy Euler angles (relative to horizon) + float enemy_pitch = asinf(clampf(2.0f * (o->ori.w * o->ori.y - o->ori.z * o->ori.x), -1.0f, 1.0f)); + float enemy_roll = atan2f(2.0f * (o->ori.w * o->ori.x + o->ori.y * o->ori.z), + 1.0f - 2.0f * (o->ori.x * o->ori.x + o->ori.y * o->ori.y)); + + // Enemy heading relative to player (+1 = pointing at player, -1 = pointing away) + float enemy_heading_rel = target_aspect; // Already computed as dot(opp_fwd, to_player) + + int i = 0; + // Instruments (4 obs) + env->observations[i++] = norm3(p->vel) * INV_MAX_SPEED; + env->observations[i++] = p->pos.z * INV_WORLD_MAX_Z; + env->observations[i++] = pitch / (PI * 0.5f); + env->observations[i++] = roll / PI; + + // Gunsight (3 obs) + env->observations[i++] = target_az / PI; + env->observations[i++] = target_el / (PI * 0.5f); + env->observations[i++] = range_km; + + // Visual cues (3 obs) + env->observations[i++] = target_aspect; + env->observations[i++] = horizon_visible; + env->observations[i++] = clampf(closure_rate * INV_MAX_SPEED, -1.0f, 1.0f); + + // Enemy state (3 obs) - NEW + env->observations[i++] = enemy_pitch / (PI * 0.5f); // Enemy nose angle vs horizon + env->observations[i++] = enemy_roll / PI; // Enemy bank angle vs horizon + env->observations[i++] = enemy_heading_rel; // Pointing toward/away + // OBS_SIZE = 13 +} + +// Scheme 5: REALISTIC_FULL (15 obs) +// REALISTIC_ENEMY_STATE + turn rate + G-loading +void compute_obs_realistic_full(Dogfight *env) { + Plane *p = &env->player; + Plane *o = &env->opponent; + + Quat q_inv = {p->ori.w, -p->ori.x, -p->ori.y, -p->ori.z}; + + // Player Euler angles + float pitch = asinf(clampf(2.0f * (p->ori.w * p->ori.y - p->ori.z * p->ori.x), -1.0f, 1.0f)); + float roll = atan2f(2.0f * (p->ori.w * p->ori.x + p->ori.y * p->ori.z), + 1.0f - 2.0f * (p->ori.x * p->ori.x + p->ori.y * p->ori.y)); + + // Target in body frame for gunsight + Vec3 rel_pos = sub3(o->pos, p->pos); + Vec3 rel_pos_body = quat_rotate(q_inv, rel_pos); + float dist = norm3(rel_pos); + + float target_az = atan2f(rel_pos_body.y, rel_pos_body.x); + float r_horiz = sqrtf(rel_pos_body.x * rel_pos_body.x + rel_pos_body.y * rel_pos_body.y); + float target_el = atan2f(rel_pos_body.z, fmaxf(r_horiz, 1e-6f)); + + // Range in km + float range_km = clampf(dist / 2000.0f, 0.0f, 1.0f); + + // Opponent aspect + Vec3 opp_fwd = quat_rotate(o->ori, vec3(1, 0, 0)); + Vec3 to_player = normalize3(sub3(p->pos, o->pos)); + float target_aspect = dot3(opp_fwd, to_player); + + // Horizon visible + Vec3 up = quat_rotate(p->ori, vec3(0, 0, 1)); + float horizon_visible = up.z; + + // Closure rate + Vec3 rel_vel = sub3(p->vel, o->vel); + float closure_rate = dot3(rel_vel, normalize3(rel_pos)); + + // Enemy Euler angles + float enemy_pitch = asinf(clampf(2.0f * (o->ori.w * o->ori.y - o->ori.z * o->ori.x), -1.0f, 1.0f)); + float enemy_roll = atan2f(2.0f * (o->ori.w * o->ori.x + o->ori.y * o->ori.z), + 1.0f - 2.0f * (o->ori.x * o->ori.x + o->ori.y * o->ori.y)); + float enemy_heading_rel = target_aspect; + + // Turn rate from velocity change + float speed = norm3(p->vel); + float turn_rate_actual = 0.0f; + if (speed > 10.0f) { + Vec3 accel = mul3(sub3(p->vel, p->prev_vel), 1.0f / DT); + Vec3 vel_dir = mul3(p->vel, 1.0f / speed); + float accel_forward = dot3(accel, vel_dir); + Vec3 accel_centripetal = sub3(accel, mul3(vel_dir, accel_forward)); + float centripetal_mag = norm3(accel_centripetal); + turn_rate_actual = centripetal_mag / speed; // ω = a/v + } + // Normalize turn rate: max ~0.5 rad/s (29°/s) for sustained turn + float turn_rate_norm = clampf(turn_rate_actual / 0.5f, -1.0f, 1.0f); + + // G-loading: use physics-accurate p->g_force (aerodynamic forces) + // Range: -1.5 to +6.0 G, normalize so 1G = 0, 6G = 1, -1.5G = -0.5 + float g_loading_norm = clampf((p->g_force - 1.0f) / 5.0f, -0.5f, 1.0f); + + int i = 0; + // Instruments (4 obs) + env->observations[i++] = speed * INV_MAX_SPEED; + env->observations[i++] = p->pos.z * INV_WORLD_MAX_Z; + env->observations[i++] = pitch / (PI * 0.5f); + env->observations[i++] = roll / PI; + + // Gunsight (3 obs) + env->observations[i++] = target_az / PI; + env->observations[i++] = target_el / (PI * 0.5f); + env->observations[i++] = range_km; + + // Visual cues (3 obs) + env->observations[i++] = target_aspect; + env->observations[i++] = horizon_visible; + env->observations[i++] = clampf(closure_rate * INV_MAX_SPEED, -1.0f, 1.0f); + + // Enemy state (3 obs) + env->observations[i++] = enemy_pitch / (PI * 0.5f); + env->observations[i++] = enemy_roll / PI; + env->observations[i++] = enemy_heading_rel; + + // Own state (2 obs) - NEW + env->observations[i++] = turn_rate_norm; // How fast am I turning? + env->observations[i++] = g_loading_norm; // How hard am I pulling? + // OBS_SIZE = 15 +} + +// Dispatcher function +void compute_observations(Dogfight *env) { + switch (env->obs_scheme) { + case OBS_ANGLES: compute_obs_angles(env); break; + case OBS_CONTROL_ERROR: compute_obs_control_error(env); break; + case OBS_REALISTIC: compute_obs_realistic(env); break; + case OBS_REALISTIC_RANGE: compute_obs_realistic_range(env); break; + case OBS_REALISTIC_ENEMY_STATE: compute_obs_realistic_enemy_state(env); break; + case OBS_REALISTIC_FULL: compute_obs_realistic_full(env); break; + default: compute_obs_angles(env); break; + } +} + +// ============================================================================ +// Curriculum Learning: Stage-specific spawn functions +// ============================================================================ + +// Get current curriculum stage based on total episodes or random (for eval) +CurriculumStage get_curriculum_stage(Dogfight *env) { + if (!env->curriculum_enabled) return CURRICULUM_FULL_RANDOM; + if (env->curriculum_randomize) { + // Random stage for eval mode - tests all difficulties + return (CurriculumStage)(rand() % CURRICULUM_COUNT); + } + // Progressive stage for training + int stage_idx = env->total_episodes / env->episodes_per_stage; + if (stage_idx >= CURRICULUM_COUNT) stage_idx = CURRICULUM_COUNT - 1; + return (CurriculumStage)stage_idx; +} + +// Stage 0: TAIL_CHASE - Opponent ahead, same heading (easiest) +void spawn_tail_chase(Dogfight *env, Vec3 player_pos, Vec3 player_vel) { + // Opponent 200-400m directly ahead, same velocity direction + Vec3 opp_pos = vec3( + player_pos.x + rndf(200, 400), + player_pos.y + rndf(-50, 50), + player_pos.z + rndf(-30, 30) + ); + reset_plane(&env->opponent, opp_pos, player_vel); + env->opponent_ap.mode = AP_STRAIGHT; +} + +// Stage 1: HEAD_ON - Opponent coming toward us +void spawn_head_on(Dogfight *env, Vec3 player_pos, Vec3 player_vel) { + // Opponent 400-600m ahead, facing us (opposite velocity) + Vec3 opp_pos = vec3( + player_pos.x + rndf(400, 600), + player_pos.y + rndf(-50, 50), + player_pos.z + rndf(-30, 30) + ); + Vec3 opp_vel = vec3(-player_vel.x, -player_vel.y, player_vel.z); + reset_plane(&env->opponent, opp_pos, opp_vel); + env->opponent_ap.mode = AP_STRAIGHT; +} + +// Stage 6: CROSSING - 45 degree deflection shots (reduced from 90° - see CURRICULUM_PLANS.md) +// 90° deflection is historically nearly impossible; 45° is achievable with proper lead +void spawn_crossing(Dogfight *env, Vec3 player_pos, Vec3 player_vel) { + // Opponent 300-500m to the side, flying at 45° angle (not perpendicular) + float side = rndf(0, 1) > 0.5f ? 1.0f : -1.0f; + Vec3 opp_pos = vec3( + player_pos.x + rndf(100, 200), + player_pos.y + side * rndf(300, 500), + player_pos.z + rndf(-50, 50) + ); + // 45° crossing velocity: opponent flies at 45° angle across player's path + // cos(45°) ≈ 0.707, sin(45°) ≈ 0.707 + float speed = norm3(player_vel); + float cos45 = 0.7071f; + float sin45 = 0.7071f; + // side=+1 (right): fly toward (-45°) = (cos, -sin) to cross leftward + // side=-1 (left): fly toward (+45°) = (cos, +sin) to cross rightward + Vec3 opp_vel = vec3(speed * cos45, -side * speed * sin45, 0); + reset_plane(&env->opponent, opp_pos, opp_vel); + env->opponent_ap.mode = AP_STRAIGHT; +} + +// Stage 3: VERTICAL - Above or below player +void spawn_vertical(Dogfight *env, Vec3 player_pos, Vec3 player_vel) { + // Opponent 200-400m ahead, 200-400m above OR below + float vert = rndf(0, 1) > 0.5f ? 1.0f : -1.0f; + float alt_offset = vert * rndf(200, 400); + Vec3 opp_pos = vec3( + player_pos.x + rndf(200, 400), + player_pos.y + rndf(-50, 50), + clampf(player_pos.z + alt_offset, 300, 2500) + ); + reset_plane(&env->opponent, opp_pos, player_vel); + env->opponent_ap.mode = AP_LEVEL; // Maintain altitude +} + +// Stage 4: MANEUVERING - Opponent does gentle turns (30°) +void spawn_maneuvering(Dogfight *env, Vec3 player_pos, Vec3 player_vel) { + // Random spawn position (similar to original) + Vec3 opp_pos = vec3( + player_pos.x + rndf(200, 500), + player_pos.y + rndf(-100, 100), + player_pos.z + rndf(-50, 50) + ); + reset_plane(&env->opponent, opp_pos, player_vel); + // Randomly choose turn direction - gentle 30° bank + env->opponent_ap.mode = rndf(0, 1) > 0.5f ? AP_TURN_LEFT : AP_TURN_RIGHT; + env->opponent_ap.target_bank = AP_STAGE4_BANK_DEG * (M_PI / 180.0f); // 30° +} + +// Stage 5: FULL_RANDOM - Medium difficulty (360° spawn + random heading, 45° turns) +void spawn_full_random(Dogfight *env, Vec3 player_pos, Vec3 player_vel) { + // Random direction in 3D sphere (300-600m from player) + float dist = rndf(300, 600); + float theta = rndf(0, 2.0f * M_PI); // Azimuth: 0-360° + float phi = rndf(-0.3f, 0.3f); // Elevation: ±17° (keep near level) + + Vec3 opp_pos = vec3( + player_pos.x + dist * cosf(theta) * cosf(phi), + player_pos.y + dist * sinf(theta) * cosf(phi), + clampf(player_pos.z + dist * sinf(phi), 300, 2500) + ); + + // Random velocity direction (not necessarily toward/away from player) + float vel_theta = rndf(0, 2.0f * M_PI); + float speed = norm3(player_vel); + Vec3 opp_vel = vec3(speed * cosf(vel_theta), speed * sinf(vel_theta), 0); + + reset_plane(&env->opponent, opp_pos, opp_vel); + + // Set orientation to match velocity direction (yaw rotation around Z) + env->opponent.ori = quat_from_axis_angle(vec3(0, 0, 1), vel_theta); + + // Use autopilot randomization (if configured) + if (env->opponent_ap.randomize_on_reset) { + autopilot_randomize(&env->opponent_ap); + } else { + // Default: uniform random mode with 45° turns + float r = rndf(0, 1); + if (r < 0.2f) env->opponent_ap.mode = AP_STRAIGHT; + else if (r < 0.4f) env->opponent_ap.mode = AP_LEVEL; + else if (r < 0.6f) env->opponent_ap.mode = AP_TURN_LEFT; + else if (r < 0.8f) env->opponent_ap.mode = AP_TURN_RIGHT; + else env->opponent_ap.mode = AP_CLIMB; + } + // Set 45° bank for stage 5 turns + env->opponent_ap.target_bank = AP_STAGE5_BANK_DEG * (M_PI / 180.0f); +} + +// Stage 6: HARD_MANEUVERING - Hard turns and weave patterns +void spawn_hard_maneuvering(Dogfight *env, Vec3 player_pos, Vec3 player_vel) { + Vec3 opp_pos = vec3( + player_pos.x + rndf(200, 400), + player_pos.y + rndf(-100, 100), + player_pos.z + rndf(-50, 50) + ); + reset_plane(&env->opponent, opp_pos, player_vel); + + // Pick from hard maneuver modes + float r = rndf(0, 1); + if (r < 0.3f) { + env->opponent_ap.mode = AP_HARD_TURN_LEFT; + } else if (r < 0.6f) { + env->opponent_ap.mode = AP_HARD_TURN_RIGHT; + } else { + env->opponent_ap.mode = AP_WEAVE; + env->opponent_ap.phase = rndf(0, 2.0f * M_PI); // Random start phase + } +} + +// Stage 7: EVASIVE - Opponent reacts to player position +void spawn_evasive(Dogfight *env, Vec3 player_pos, Vec3 player_vel) { + // Spawn in various positions (like FULL_RANDOM) + float dist = rndf(300, 500); + float theta = rndf(0, 2.0f * M_PI); + float phi = rndf(-0.3f, 0.3f); + + Vec3 opp_pos = vec3( + player_pos.x + dist * cosf(theta) * cosf(phi), + player_pos.y + dist * sinf(theta) * cosf(phi), + clampf(player_pos.z + dist * sinf(phi), 300, 2500) + ); + + float vel_theta = rndf(0, 2.0f * M_PI); + float speed = norm3(player_vel); + Vec3 opp_vel = vec3(speed * cosf(vel_theta), speed * sinf(vel_theta), 0); + + reset_plane(&env->opponent, opp_pos, opp_vel); + env->opponent.ori = quat_from_axis_angle(vec3(0, 0, 1), vel_theta); + + // Mix of hard modes with AP_EVASIVE dominant + float r = rndf(0, 1); + if (r < 0.4f) { + env->opponent_ap.mode = AP_EVASIVE; + } else if (r < 0.55f) { + env->opponent_ap.mode = AP_HARD_TURN_LEFT; + } else if (r < 0.7f) { + env->opponent_ap.mode = AP_HARD_TURN_RIGHT; + } else if (r < 0.85f) { + env->opponent_ap.mode = AP_WEAVE; + env->opponent_ap.phase = rndf(0, 2.0f * M_PI); + } else { + // 15% chance of regular turn modes (still steep 60°) + env->opponent_ap.mode = rndf(0,1) > 0.5f ? AP_TURN_LEFT : AP_TURN_RIGHT; + env->opponent_ap.target_bank = AP_STAGE6_BANK_DEG * (M_PI / 180.0f); // 60° + } +} + +// Master spawn function: dispatches to stage-specific spawner +void spawn_by_curriculum(Dogfight *env, Vec3 player_pos, Vec3 player_vel) { + CurriculumStage new_stage = get_curriculum_stage(env); + + // Log stage transitions + if (new_stage != env->stage) { + if (DEBUG > 5) printf("[Curriculum] Episode %d: Stage %d -> %d\n", + env->total_episodes, env->stage, new_stage); + env->stage = new_stage; + } + + switch (env->stage) { + case CURRICULUM_TAIL_CHASE: spawn_tail_chase(env, player_pos, player_vel); break; + case CURRICULUM_HEAD_ON: spawn_head_on(env, player_pos, player_vel); break; + case CURRICULUM_CROSSING: spawn_crossing(env, player_pos, player_vel); break; + case CURRICULUM_VERTICAL: spawn_vertical(env, player_pos, player_vel); break; + case CURRICULUM_MANEUVERING: spawn_maneuvering(env, player_pos, player_vel); break; + case CURRICULUM_FULL_RANDOM: spawn_full_random(env, player_pos, player_vel); break; + case CURRICULUM_HARD_MANEUVERING: spawn_hard_maneuvering(env, player_pos, player_vel); break; + case CURRICULUM_EVASIVE: + default: spawn_evasive(env, player_pos, player_vel); break; + } + + // Reset autopilot PID state after spawning + env->opponent_ap.prev_vz = 0.0f; + env->opponent_ap.prev_bank_error = 0.0f; +} + +// Legacy spawn (for curriculum_enabled=0) +void spawn_legacy(Dogfight *env, Vec3 player_pos, Vec3 player_vel) { + Vec3 opp_pos = vec3( + player_pos.x + rndf(200, 500), + player_pos.y + rndf(-100, 100), + player_pos.z + rndf(-50, 50) + ); + reset_plane(&env->opponent, opp_pos, player_vel); + + // Handle autopilot: randomize if configured, reset PID state + if (env->opponent_ap.randomize_on_reset) { + autopilot_randomize(&env->opponent_ap); + } + env->opponent_ap.prev_vz = 0.0f; + env->opponent_ap.prev_bank_error = 0.0f; +} + +// ============================================================================ + +void c_reset(Dogfight *env) { + // Increment total episodes BEFORE determining stage (so first episode is 0) + env->total_episodes++; + + env->tick = 0; + env->episode_return = 0.0f; + + // Clear episode tracking + env->kill = 0; + env->episode_shots_fired = 0.0f; + env->total_aileron_usage = 0.0f; + env->aileron_bias = 0.0f; + env->prev_dist = 0.0f; + + // Reset reward accumulators + env->sum_r_approach = 0.0f; + env->sum_r_closing = 0.0f; + env->sum_r_tail = 0.0f; + env->sum_r_speed = 0.0f; + env->sum_r_roll = 0.0f; + env->sum_r_neg_g = 0.0f; + env->sum_r_rudder = 0.0f; + env->sum_r_aileron = 0.0f; + env->sum_r_bias = 0.0f; + env->sum_r_level = 0.0f; + env->sum_r_aim = 0.0f; + env->death_reason = DEATH_NONE; + + // Reset aiming diagnostics + env->best_aim_angle = M_PI; // Start at worst (180°) + env->ticks_in_cone = 0; + env->closest_dist = 10000.0f; // Start at max + + // Reset flight envelope diagnostics + env->max_g = 1.0f; // Start at 1G (level flight) + env->min_g = 1.0f; + env->max_bank = 0.0f; + env->max_pitch = 0.0f; + env->min_speed = 10000.0f; // Start at max + env->max_speed = 0.0f; + env->min_alt = 10000.0f; // Start at max + env->max_alt = 0.0f; + env->sum_throttle = 0.0f; + env->trigger_pulls = 0; + env->prev_trigger = 0; + + // Gun cone for hit detection - stays fixed at 5° + env->cos_gun_cone = cosf(env->gun_cone_angle); + env->cos_gun_cone_2x = cosf(env->gun_cone_angle * 2.0f); + + // Anneal reward cone: start large (easy), shrink to gun cone (hard) + float anneal_frac = fminf((float)env->total_episodes / (float)env->aim_anneal_episodes, 1.0f); + env->reward_cone_angle = env->aim_cone_start + anneal_frac * (env->aim_cone_end - env->aim_cone_start); + env->cos_reward_cone = cosf(env->reward_cone_angle); + env->cos_reward_cone_2x = cosf(env->reward_cone_angle * 2.0f); + + // Spawn player at random position + Vec3 pos = vec3(rndf(-500, 500), rndf(-500, 500), rndf(500, 1500)); + Vec3 vel = vec3(80, 0, 0); + reset_plane(&env->player, pos, vel); + + // Spawn opponent based on curriculum stage (or legacy if disabled) + if (env->curriculum_enabled) { + spawn_by_curriculum(env, pos, vel); + } else { + spawn_legacy(env, pos, vel); + } + + if (DEBUG >= 10) printf("=== RESET ===\n"); + if (DEBUG >= 10) printf("kill=%d, episode_shots_fired=%.0f (now cleared)\n", env->kill, env->episode_shots_fired); + if (DEBUG >= 10) printf("player_pos=(%.1f, %.1f, %.1f)\n", pos.x, pos.y, pos.z); + if (DEBUG >= 10) printf("player_vel=(%.1f, %.1f, %.1f) speed=%.1f\n", vel.x, vel.y, vel.z, norm3(vel)); + if (DEBUG >= 10) printf("opponent_pos=(%.1f, %.1f, %.1f)\n", env->opponent.pos.x, env->opponent.pos.y, env->opponent.pos.z); + if (DEBUG >= 10) printf("initial_dist=%.1f m, stage=%d\n", norm3(sub3(env->opponent.pos, pos)), env->stage); + + compute_observations(env); +} + +// Check if shooter hits target (cone-based hit detection) +bool check_hit(Plane *shooter, Plane *target, float cos_gun_cone) { + Vec3 to_target = sub3(target->pos, shooter->pos); + float dist = norm3(to_target); + if (dist > GUN_RANGE) return false; + if (dist < 1.0f) return false; // Too close (avoid division issues) + + Vec3 forward = quat_rotate(shooter->ori, vec3(1, 0, 0)); + Vec3 to_target_norm = normalize3(to_target); + float cos_angle = dot3(to_target_norm, forward); + return cos_angle > cos_gun_cone; +} + +// Respawn opponent at random position ahead of player +void respawn_opponent(Dogfight *env) { + Plane *p = &env->player; + Vec3 fwd = quat_rotate(p->ori, vec3(1, 0, 0)); + + // Spawn 300-600m ahead, with some lateral offset + Vec3 opp_pos = vec3( + p->pos.x + fwd.x * rndf(300, 600) + rndf(-100, 100), + p->pos.y + fwd.y * rndf(300, 600) + rndf(-100, 100), + clampf(p->pos.z + rndf(-100, 100), 200, 2500) + ); + Vec3 vel = vec3(80, 0, 0); + reset_plane(&env->opponent, opp_pos, vel); + + // Reset autopilot PID state on respawn + env->opponent_ap.prev_vz = 0.0f; + env->opponent_ap.prev_bank_error = 0.0f; + + if (DEBUG >= 10) printf("=== RESPAWN ===\n"); + if (DEBUG >= 10) printf("player_pos=(%.1f, %.1f, %.1f)\n", p->pos.x, p->pos.y, p->pos.z); + if (DEBUG >= 10) printf("player_fwd=(%.2f, %.2f, %.2f)\n", fwd.x, fwd.y, fwd.z); + if (DEBUG >= 10) printf("new_opponent_pos=(%.1f, %.1f, %.1f)\n", opp_pos.x, opp_pos.y, opp_pos.z); + if (DEBUG >= 10) printf("opponent_vel=(%.1f, %.1f, %.1f) NOTE: always +X!\n", vel.x, vel.y, vel.z); + if (DEBUG >= 10) printf("respawn_dist=%.1f m\n", norm3(sub3(opp_pos, p->pos))); +} + +void c_step(Dogfight *env) { + env->tick++; + env->rewards[0] = 0.0f; + env->terminals[0] = 0; + + if (DEBUG >= 10) printf("\n========== TICK %d ==========\n", env->tick); + if (DEBUG >= 10) printf("=== ACTIONS ===\n"); + if (DEBUG >= 10) printf("throttle_raw=%.3f -> throttle=%.3f\n", env->actions[0], (env->actions[0] + 1.0f) * 0.5f); + if (DEBUG >= 10) printf("elevator=%.3f -> pitch_rate=%.3f rad/s\n", env->actions[1], env->actions[1] * MAX_PITCH_RATE); + if (DEBUG >= 10) printf("ailerons=%.3f -> roll_rate=%.3f rad/s\n", env->actions[2], env->actions[2] * MAX_ROLL_RATE); + if (DEBUG >= 10) printf("rudder=%.3f -> yaw_rate=%.3f rad/s\n", env->actions[3], env->actions[3] * MAX_YAW_RATE); + if (DEBUG >= 10) printf("trigger=%.3f (fires if >0.5)\n", env->actions[4]); + + // Player uses full physics with actions + step_plane_with_physics(&env->player, env->actions, DT); + + // Opponent uses autopilot (if not AP_STRAIGHT, uses full physics) + if (env->opponent_ap.mode != AP_STRAIGHT) { + float opp_actions[5]; + env->opponent_ap.threat_pos = env->player.pos; // For AP_EVASIVE mode + autopilot_step(&env->opponent_ap, &env->opponent, opp_actions, DT); + step_plane_with_physics(&env->opponent, opp_actions, DT); + } else { + step_plane(&env->opponent, DT); + } + + // Track aileron usage for monitoring (no death penalty - see BISECTION.md) + env->total_aileron_usage += fabsf(env->actions[2]); + +#if DEBUG >= 3 + // Track flight envelope diagnostics (only when debugging - expensive) + { + Plane *dbg_p = &env->player; + if (dbg_p->g_force > env->max_g) env->max_g = dbg_p->g_force; + if (dbg_p->g_force < env->min_g) env->min_g = dbg_p->g_force; + float speed = norm3(dbg_p->vel); + if (speed < env->min_speed) env->min_speed = speed; + if (speed > env->max_speed) env->max_speed = speed; + if (dbg_p->pos.z < env->min_alt) env->min_alt = dbg_p->pos.z; + if (dbg_p->pos.z > env->max_alt) env->max_alt = dbg_p->pos.z; + // Bank angle from quaternion + float bank = atan2f(2.0f * (dbg_p->ori.w * dbg_p->ori.x + dbg_p->ori.y * dbg_p->ori.z), + 1.0f - 2.0f * (dbg_p->ori.x * dbg_p->ori.x + dbg_p->ori.y * dbg_p->ori.y)); + if (fabsf(bank) > env->max_bank) env->max_bank = fabsf(bank); + // Pitch angle from quaternion + float pitch = asinf(clampf(2.0f * (dbg_p->ori.w * dbg_p->ori.y - dbg_p->ori.z * dbg_p->ori.x), -1.0f, 1.0f)); + if (fabsf(pitch) > env->max_pitch) env->max_pitch = fabsf(pitch); + // Throttle accumulator + env->sum_throttle += dbg_p->throttle; + // Trigger pull edge detection + int trigger_now = (env->actions[4] > 0.5f) ? 1 : 0; + if (trigger_now && !env->prev_trigger) env->trigger_pulls++; + env->prev_trigger = trigger_now; + } +#endif + + // === Combat (Phase 5) === + Plane *p = &env->player; + Plane *o = &env->opponent; + float reward = 0.0f; + + // Decrement fire cooldowns + if (p->fire_cooldown > 0) p->fire_cooldown--; + if (o->fire_cooldown > 0) o->fire_cooldown--; + + // Player fires: action[4] > 0.5 and cooldown ready + if (DEBUG >= 10) printf("trigger=%.3f, cooldown=%d\n", env->actions[4], p->fire_cooldown); + if (env->actions[4] > 0.5f && p->fire_cooldown == 0) { + p->fire_cooldown = FIRE_COOLDOWN; + env->episode_shots_fired += 1.0f; + if (DEBUG >= 10) printf("=== FIRED! episode_shots_fired=%.0f ===\n", env->episode_shots_fired); + + // Check if hit = kill = SUCCESS = terminal + if (check_hit(p, o, env->cos_gun_cone)) { + if (DEBUG >= 10) printf("*** KILL! ***\n"); + env->kill = 1; + env->death_reason = DEATH_KILL; + env->rewards[0] = 1.0f; + env->episode_return += 1.0f; + env->terminals[0] = 1; + add_log(env); + c_reset(env); + return; + } else { + if (DEBUG >= 10) printf("MISS (dist=%.1f, in_cone=%d)\n", norm3(sub3(o->pos, p->pos)), + check_hit(p, o, env->cos_gun_cone)); + } + } + + // === Reward Shaping (all values from rcfg, sweepable) === + Vec3 rel_pos = sub3(o->pos, p->pos); + float dist = norm3(rel_pos); + + // 1. Approach reward: getting closer = good (asymmetric - no penalty for moving away) + float r_approach = 0.0f; + if (env->prev_dist > 0.0f) { + float dist_delta = env->prev_dist - dist; // positive when closing + r_approach = fmaxf(0.0f, dist_delta) * env->rcfg.approach; // Only reward closing + } + env->prev_dist = dist; + reward += r_approach; + + // 3. Closing velocity reward: approaching = good (asymmetric - no penalty for opening) + Vec3 rel_vel = sub3(p->vel, o->vel); + Vec3 rel_pos_norm = normalize3(rel_pos); + float closing_rate = dot3(rel_vel, rel_pos_norm); + float r_closing = fmaxf(0.0f, closing_rate) * env->rcfg.closing_scale; // Only reward closing, don't punish opening + reward += r_closing; + + // 3. Tail position reward: behind opponent = good + Vec3 opp_forward = quat_rotate(o->ori, vec3(1, 0, 0)); + float tail_angle = dot3(rel_pos_norm, opp_forward); + float r_tail = tail_angle * env->rcfg.tail_scale; + reward += r_tail; + + // 4. Speed penalty: too slow is stall risk + float speed = norm3(p->vel); + float r_speed = 0.0f; + if (speed < env->rcfg.speed_min) { + r_speed = -(env->rcfg.speed_min - speed) * env->rcfg.stall; + } + reward += r_speed; + + // 6. Roll penalty: gentle preference for level flight + float roll_angle = atan2f(2.0f * (p->ori.w * p->ori.x + p->ori.y * p->ori.z), + 1.0f - 2.0f * (p->ori.x * p->ori.x + p->ori.y * p->ori.y)); + float r_roll = -fabsf(roll_angle) * env->rcfg.roll; + reward += r_roll; + + // 7. Negative G penalty: only penalize actual negative G (below -0.5G) + // Threshold -0.5G: allows normal flight and light negative G, penalizes hard negative G + float g_threshold = -0.5f; + float g_deficit = fmaxf(0.0f, g_threshold - p->g_force); // positive when g < -0.5 + float r_neg_g = -g_deficit * env->rcfg.neg_g; + reward += r_neg_g; + + // 8. Rudder penalty: discourage excessive rudder use + float r_rudder = -fabsf(env->actions[3]) * env->rcfg.rudder; + reward += r_rudder; + + // Track aileron bias for monitoring (no reward penalty - see BISECTION.md) + env->aileron_bias += env->actions[2]; + float r_aileron = 0.0f; // Disabled - was causing "don't maneuver" trap + float r_bias = 0.0f; // Disabled - was causing "don't maneuver" trap + float r_level = 0.0f; // Disabled - was causing "don't maneuver" trap + float pitch = asinf(clampf(2.0f * (p->ori.w * p->ori.y - p->ori.z * p->ori.x), -1.0f, 1.0f)); // For debug only + + // 9. Aiming reward: feedback for gun alignment before actual hits + Vec3 player_fwd = quat_rotate(p->ori, vec3(1, 0, 0)); + Vec3 to_opp_norm = normalize3(rel_pos); + float aim_dot = dot3(to_opp_norm, player_fwd); // 1.0 = perfect aim + float aim_angle_deg = acosf(clampf(aim_dot, -1.0f, 1.0f)) * RAD_TO_DEG; + + float r_aim = 0.0f; + // Aiming rewards are ADDITIVE - tight aim gets BOTH tracking + firing_solution + // Uses annealing reward cone (starts large, shrinks to gun cone) + if (dist < GUN_RANGE) { + if (aim_dot > env->cos_reward_cone_2x) { + // Loose tracking (within 2x reward cone) - base reward + r_aim += env->rcfg.tracking; + } + if (aim_dot > env->cos_reward_cone) { + // Tight aim (within 1x reward cone) - bonus reward + r_aim += env->rcfg.firing_solution; + } + } + reward += r_aim; + +#if DEBUG >= 2 + // Track aiming diagnostics (only when debugging - acosf is expensive) + { + float aim_angle_rad = acosf(clampf(aim_dot, -1.0f, 1.0f)); + if (aim_angle_rad < env->best_aim_angle) env->best_aim_angle = aim_angle_rad; + if (aim_dot > env->cos_reward_cone) env->ticks_in_cone++; + if (dist < env->closest_dist) env->closest_dist = dist; + } +#endif + + // Accumulate for episode summary + env->sum_r_approach += r_approach; + env->sum_r_closing += r_closing; + env->sum_r_tail += r_tail; + env->sum_r_speed += r_speed; + env->sum_r_roll += r_roll; + env->sum_r_neg_g += r_neg_g; + env->sum_r_rudder += r_rudder; + env->sum_r_aileron += r_aileron; + env->sum_r_bias += r_bias; + env->sum_r_level += r_level; + env->sum_r_aim += r_aim; + + if (DEBUG >= 4 && env->env_num == 0) printf("=== REWARD ===\n"); + if (DEBUG >= 4 && env->env_num == 0) printf("r_approach=%.5f (dist=%.1f m)\n", r_approach, dist); + if (DEBUG >= 4 && env->env_num == 0) printf("r_closing=%.4f (rate=%.1f m/s)\n", r_closing, closing_rate); + if (DEBUG >= 4 && env->env_num == 0) printf("r_tail=%.4f (angle=%.2f)\n", r_tail, tail_angle); + if (DEBUG >= 4 && env->env_num == 0) printf("r_speed=%.4f (speed=%.1f)\n", r_speed, speed); + if (DEBUG >= 4 && env->env_num == 0) printf("r_roll=%.5f (roll=%.1f deg)\n", r_roll, roll_angle * RAD_TO_DEG); + if (DEBUG >= 4 && env->env_num == 0) printf("r_neg_g=%.5f (g=%.2f)\n", r_neg_g, p->g_force); + if (DEBUG >= 4 && env->env_num == 0) printf("r_rudder=%.5f (rud=%.2f)\n", r_rudder, env->actions[3]); + if (DEBUG >= 4 && env->env_num == 0) printf("r_aileron=%.5f (ail=%.2f)\n", r_aileron, env->actions[2]); + if (DEBUG >= 4 && env->env_num == 0) printf("r_bias=%.5f (bias=%.1f)\n", r_bias, env->aileron_bias); + if (DEBUG >= 4 && env->env_num == 0) printf("r_level=%.4f (bank=%.1f°, pitch=%.1f°)\n", r_level, roll_angle * RAD_TO_DEG, pitch * RAD_TO_DEG); + if (DEBUG >= 4 && env->env_num == 0) printf("r_aim=%.4f (aim_angle=%.1f deg, dist=%.1f)\n", r_aim, aim_angle_deg, dist); + if (DEBUG >= 4 && env->env_num == 0) printf("reward_total=%.4f\n", reward); + + if (DEBUG >= 10) printf("=== COMBAT ===\n"); + if (DEBUG >= 10) printf("aim_angle=%.1f deg (cone=5 deg)\n", aim_angle_deg); + if (DEBUG >= 10) printf("dist_to_target=%.1f m (gun_range=500)\n", dist); + if (DEBUG >= 10) printf("in_cone=%d, in_range=%d\n", aim_dot > env->cos_gun_cone, dist < GUN_RANGE); + + // Clamp reward to prevent extreme values causing gradient explosion + reward = fmaxf(-1.0f, fminf(1.0f, reward)); + env->rewards[0] = reward; + env->episode_return += reward; + + // Check bounds (player only) + bool oob = fabsf(p->pos.x) > WORLD_HALF_X || + fabsf(p->pos.y) > WORLD_HALF_Y || + p->pos.z < 0 || p->pos.z > WORLD_MAX_Z; + + // Check for supersonic (physics blowup) - 340 m/s = Mach 1 + float player_speed = norm3(p->vel); + float opp_speed = norm3(o->vel); + bool supersonic = player_speed > 340.0f || opp_speed > 340.0f; + if (DEBUG && supersonic) { + printf("=== SUPERSONIC BLOWUP ===\n"); + printf("player_speed=%.1f, opp_speed=%.1f\n", player_speed, opp_speed); + printf("player_vel=(%.1f, %.1f, %.1f)\n", p->vel.x, p->vel.y, p->vel.z); + printf("opp_vel=(%.1f, %.1f, %.1f)\n", o->vel.x, o->vel.y, o->vel.z); + printf("opp_ap_mode=%d\n", env->opponent_ap.mode); + } + + if (oob || env->tick >= env->max_steps || supersonic) { + if (DEBUG >= 10) printf("=== TERMINAL (FAILURE) ===\n"); + if (DEBUG >= 10) printf("oob=%d, supersonic=%d, tick=%d/%d\n", oob, supersonic, env->tick, env->max_steps); + // Track death reason (priority: supersonic > oob > timeout) + if (supersonic) { + env->death_reason = DEATH_SUPERSONIC; + } else if (oob) { + env->death_reason = DEATH_OOB; + } else { + env->death_reason = DEATH_TIMEOUT; + } + env->rewards[0] = (supersonic || p->pos.z <= 0) ? -1.0f : 0.0f; + env->terminals[0] = 1; + add_log(env); + c_reset(env); + return; + } + + compute_observations(env); +} + +// Forward declaration for c_close (used in c_render) +void c_close(Dogfight *env); + +// Draw airplane shape using lines - shows roll/pitch/yaw clearly +// Body frame: X=forward, Y=right, Z=up +void draw_plane_shape(Vec3 pos, Quat ori, Color body_color, Color wing_color) { + // Body frame points (scaled for visibility: ~20m wingspan, ~25m length) + Vec3 nose = vec3(15, 0, 0); + Vec3 tail = vec3(-10, 0, 0); + Vec3 left_wing = vec3(0, -12, 0); + Vec3 right_wing = vec3(0, 12, 0); + Vec3 vtail_top = vec3(-8, 0, 8); // Vertical stabilizer + Vec3 htail_left = vec3(-10, -5, 0); // Horizontal stabilizer + Vec3 htail_right = vec3(-10, 5, 0); + + // Rotate all points by orientation and translate to world position + Vec3 nose_w = add3(pos, quat_rotate(ori, nose)); + Vec3 tail_w = add3(pos, quat_rotate(ori, tail)); + Vec3 lwing_w = add3(pos, quat_rotate(ori, left_wing)); + Vec3 rwing_w = add3(pos, quat_rotate(ori, right_wing)); + Vec3 vtop_w = add3(pos, quat_rotate(ori, vtail_top)); + Vec3 htl_w = add3(pos, quat_rotate(ori, htail_left)); + Vec3 htr_w = add3(pos, quat_rotate(ori, htail_right)); + + // Convert to Raylib Vector3 + Vector3 nose_r = {nose_w.x, nose_w.y, nose_w.z}; + Vector3 tail_r = {tail_w.x, tail_w.y, tail_w.z}; + Vector3 lwing_r = {lwing_w.x, lwing_w.y, lwing_w.z}; + Vector3 rwing_r = {rwing_w.x, rwing_w.y, rwing_w.z}; + Vector3 vtop_r = {vtop_w.x, vtop_w.y, vtop_w.z}; + Vector3 htl_r = {htl_w.x, htl_w.y, htl_w.z}; + Vector3 htr_r = {htr_w.x, htr_w.y, htr_w.z}; + + // Fuselage (nose to tail) + DrawLine3D(nose_r, tail_r, body_color); + + // Main wings (left to right, through center for visibility) + DrawLine3D(lwing_r, rwing_r, wing_color); + // Wing to fuselage connections (makes it look more solid) + DrawLine3D(lwing_r, nose_r, wing_color); + DrawLine3D(rwing_r, nose_r, wing_color); + + // Vertical stabilizer (tail to top) + DrawLine3D(tail_r, vtop_r, body_color); + + // Horizontal stabilizer + DrawLine3D(htl_r, htr_r, body_color); + DrawLine3D(htl_r, tail_r, body_color); + DrawLine3D(htr_r, tail_r, body_color); + + // Small sphere at nose to show front clearly + DrawSphere(nose_r, 2.0f, body_color); +} + +void handle_camera_controls(Client *c) { + Vector2 mouse = GetMousePosition(); + + if (IsMouseButtonPressed(MOUSE_BUTTON_LEFT)) { + c->is_dragging = true; + c->last_mouse_x = mouse.x; + c->last_mouse_y = mouse.y; + } + if (IsMouseButtonReleased(MOUSE_BUTTON_LEFT)) { + c->is_dragging = false; + } + + if (c->is_dragging) { + float sensitivity = 0.005f; + c->cam_azimuth -= (mouse.x - c->last_mouse_x) * sensitivity; + c->cam_elevation += (mouse.y - c->last_mouse_y) * sensitivity; + c->cam_elevation = clampf(c->cam_elevation, -1.4f, 1.4f); // prevent gimbal lock + c->last_mouse_x = mouse.x; + c->last_mouse_y = mouse.y; + } + + // Mouse wheel zoom + float wheel = GetMouseWheelMove(); + if (wheel != 0) { + c->cam_distance = clampf(c->cam_distance - wheel * 10.0f, 30.0f, 300.0f); + } +} + +void c_render(Dogfight *env) { + // 1. Lazy initialization + if (env->client == NULL) { + env->client = (Client *)calloc(1, sizeof(Client)); + env->client->width = 1280; + env->client->height = 720; + env->client->cam_distance = 80.0f; + env->client->cam_azimuth = 0.0f; + env->client->cam_elevation = 0.3f; + env->client->is_dragging = false; + + InitWindow(1280, 720, "Dogfight"); + SetTargetFPS(60); + + // Z-up coordinate system + env->client->camera.up = (Vector3){0.0f, 0.0f, 1.0f}; + env->client->camera.fovy = 45.0f; + env->client->camera.projection = CAMERA_PERSPECTIVE; + } + + // 2. Handle window close + if (WindowShouldClose() || IsKeyDown(KEY_ESCAPE)) { + c_close(env); + exit(0); + } + + // 3. Handle mouse controls for camera orbit + handle_camera_controls(env->client); + + // 4. Update chase camera + Plane *p = &env->player; + Vec3 fwd = quat_rotate(p->ori, vec3(1, 0, 0)); + float dist = env->client->cam_distance; + + // Apply orbit offsets from mouse drag + float az = env->client->cam_azimuth; + float el = env->client->cam_elevation; + + // Base chase position (behind and above player) + float cam_x = p->pos.x - fwd.x * dist * cosf(el) * cosf(az) + fwd.y * dist * sinf(az); + float cam_y = p->pos.y - fwd.y * dist * cosf(el) * cosf(az) - fwd.x * dist * sinf(az); + float cam_z = p->pos.z + dist * sinf(el) + 20.0f; + + env->client->camera.position = (Vector3){cam_x, cam_y, cam_z}; + env->client->camera.target = (Vector3){p->pos.x, p->pos.y, p->pos.z}; + + // 5. Begin drawing + BeginDrawing(); + ClearBackground((Color){6, 24, 24, 255}); // Dark blue-green sky + + // Set clip planes for long-range visibility (default far=1000 is too close) + rlSetClipPlanes(1.0, 10000.0); // near=1m, far=10km + BeginMode3D(env->client->camera); + + // 6. Draw ground plane at z=0 (XY plane, since we use Z-up) + // DrawPlane uses raylib's Y-up convention (XZ plane), so we draw triangles instead + Vector3 g1 = {-2000, -2000, 0}; + Vector3 g2 = {2000, -2000, 0}; + Vector3 g3 = {2000, 2000, 0}; + Vector3 g4 = {-2000, 2000, 0}; + Color ground_color = (Color){20, 60, 20, 255}; + DrawTriangle3D(g1, g2, g3, ground_color); + DrawTriangle3D(g1, g3, g4, ground_color); + + // 7. Draw world bounds wireframe + // Bounds: X +/-2000, Y +/-2000, Z 0-3000 -> center at (0, 0, 1500) + DrawCubeWires((Vector3){0, 0, 1500}, 4000, 4000, 3000, (Color){100, 100, 100, 255}); + + // 8. Draw player plane (cyan wireframe airplane) + Color cyan = {0, 255, 255, 255}; + Color light_cyan = {100, 255, 255, 255}; + draw_plane_shape(p->pos, p->ori, cyan, light_cyan); + + // 9. Draw opponent plane (red wireframe airplane) + Plane *o = &env->opponent; + draw_plane_shape(o->pos, o->ori, RED, ORANGE); + + // 10. Draw tracer when firing (cooldown just set = just fired) + if (p->fire_cooldown >= FIRE_COOLDOWN - 2) { // Show for 2 frames + Vec3 nose = add3(p->pos, quat_rotate(p->ori, vec3(15, 0, 0))); + Vec3 tracer_end = add3(p->pos, quat_rotate(p->ori, vec3(GUN_RANGE, 0, 0))); + Vector3 nose_r = {nose.x, nose.y, nose.z}; + Vector3 end_r = {tracer_end.x, tracer_end.y, tracer_end.z}; + DrawLine3D(nose_r, end_r, YELLOW); + } + + EndMode3D(); + + // 10. Draw HUD + float speed = norm3(p->vel); + float dist_to_opp = norm3(sub3(o->pos, p->pos)); + + DrawText(TextFormat("Speed: %.0f m/s", speed), 10, 10, 20, WHITE); + DrawText(TextFormat("Altitude: %.0f m", p->pos.z), 10, 40, 20, WHITE); + DrawText(TextFormat("Throttle: %.0f%%", p->throttle * 100.0f), 10, 70, 20, WHITE); + DrawText(TextFormat("Distance: %.0f m", dist_to_opp), 10, 100, 20, WHITE); + DrawText(TextFormat("Tick: %d / %d", env->tick, env->max_steps), 10, 130, 20, WHITE); + DrawText(TextFormat("Return: %.2f", env->episode_return), 10, 160, 20, WHITE); + DrawText(TextFormat("Perf: %.1f%% | Shots: %.0f", env->log.perf / fmaxf(env->log.n, 1.0f) * 100.0f, env->log.shots_fired), 10, 190, 20, YELLOW); + + // Controls hint + DrawText("Mouse drag: Orbit | Scroll: Zoom | ESC: Exit", 10, (int)env->client->height - 30, 16, GRAY); + + EndDrawing(); +} + +void c_close(Dogfight *env) { + if (env->client != NULL) { + CloseWindow(); + free(env->client); + env->client = NULL; + } +} + +// Force exact game state for testing. Defaults shown in comments are applied in Python. +void force_state( + Dogfight *env, + float p_px, // = 0.0f, player pos X + float p_py, // = 0.0f, player pos Y + float p_pz, // = 1000.0f, player pos Z + float p_vx, // = 150.0f, player vel X (m/s) + float p_vy, // = 0.0f, player vel Y + float p_vz, // = 0.0f, player vel Z + float p_ow, // = 1.0f, player orientation quat W + float p_ox, // = 0.0f, player orientation quat X + float p_oy, // = 0.0f, player orientation quat Y + float p_oz, // = 0.0f, player orientation quat Z + float p_throttle, // = 1.0f, player throttle [0,1] + float o_px, // = -9999.0f (auto: 400m ahead), opponent pos X + float o_py, // = -9999.0f (auto), opponent pos Y + float o_pz, // = -9999.0f (auto), opponent pos Z + float o_vx, // = -9999.0f (auto: match player), opponent vel X + float o_vy, // = -9999.0f (auto), opponent vel Y + float o_vz, // = -9999.0f (auto), opponent vel Z + float o_ow, // = -9999.0f (auto: match player), opponent ori W + float o_ox, // = -9999.0f (auto), opponent ori X + float o_oy, // = -9999.0f (auto), opponent ori Y + float o_oz, // = -9999.0f (auto), opponent ori Z + int tick // = 0, environment tick +) { + // Player state + env->player.pos = vec3(p_px, p_py, p_pz); + env->player.vel = vec3(p_vx, p_vy, p_vz); + env->player.ori = quat(p_ow, p_ox, p_oy, p_oz); + quat_normalize(&env->player.ori); + env->player.throttle = p_throttle; + env->player.fire_cooldown = 0; + + // Opponent position: auto = 400m ahead of player + if (o_px < -9000.0f) { + Vec3 fwd = quat_rotate(env->player.ori, vec3(1, 0, 0)); + env->opponent.pos = add3(env->player.pos, mul3(fwd, 400.0f)); + } else { + env->opponent.pos = vec3(o_px, o_py, o_pz); + } + + // Opponent velocity: auto = match player + if (o_vx < -9000.0f) { + env->opponent.vel = env->player.vel; + } else { + env->opponent.vel = vec3(o_vx, o_vy, o_vz); + } + + // Opponent orientation: auto = match player + if (o_ow < -9000.0f) { + env->opponent.ori = env->player.ori; + } else { + env->opponent.ori = quat(o_ow, o_ox, o_oy, o_oz); + quat_normalize(&env->opponent.ori); + } + env->opponent.fire_cooldown = 0; + + // Environment state + env->tick = tick; + env->episode_return = 0.0f; + + compute_observations(env); +} diff --git a/pufferlib/ocean/dogfight/dogfight.py b/pufferlib/ocean/dogfight/dogfight.py new file mode 100644 index 000000000..1bf492e54 --- /dev/null +++ b/pufferlib/ocean/dogfight/dogfight.py @@ -0,0 +1,326 @@ +import time +import numpy as np +import gymnasium + +import pufferlib +from pufferlib.ocean.dogfight import binding + + +# Autopilot mode constants (must match autopilot.h enum) +class AutopilotMode: + STRAIGHT = 0 # Fly straight (current/default behavior) + LEVEL = 1 # Level flight with PD on vz + TURN_LEFT = 2 # Coordinated left turn + TURN_RIGHT = 3 # Coordinated right turn + CLIMB = 4 # Constant climb rate + DESCEND = 5 # Constant descent rate + RANDOM = 6 # Random mode selection at reset + + +# Observation sizes by scheme (must match C OBS_SIZES in dogfight.h) +OBS_SIZES = { + 0: 12, # ANGLES: pos(3) + speed(1) + euler(3) + target_angles(4) + opp(1) + 1: 17, # CONTROL_ERROR: player(11) + control_errors(4) + target(2) + 2: 10, # REALISTIC: instruments(4) + gunsight(3) + visual(3) + 3: 10, # REALISTIC_RANGE: instruments(4) + gunsight(3) + visual(3) w/ km range + 4: 13, # REALISTIC_ENEMY_STATE: + enemy pitch/roll/heading + 5: 15, # REALISTIC_FULL: + turn rate + G-loading +} + + +class Dogfight(pufferlib.PufferEnv): + def __init__( + self, + num_envs=16, + render_mode=None, + render_fps=None, # Target FPS when rendering (None=no delay, 50=real-time, 10=slow-mo) + report_interval=1, + buf=None, + seed=42, + max_steps=3000, + obs_scheme=0, + # Curriculum learning + curriculum_enabled=0, # 0=off (legacy), 1=on (progressive stages) + curriculum_randomize=0, # 0=progressive (training), 1=random stage each episode (eval) + episodes_per_stage=60, # Episodes before advancing difficulty + # Reward weights (all sweepable via INI) + reward_closing_scale=0.002, + reward_tail_scale=0.005, + reward_tracking=0.05, + reward_firing_solution=0.1, + penalty_stall=0.002, + penalty_roll=0.0001, + penalty_neg_g=0.002, + penalty_rudder=0.0002, + penalty_aileron=0.015, + penalty_bias=0.01, + reward_approach=0.005, + reward_level=0.02, + # Thresholds (not swept) + alt_max=2500.0, + speed_min=50.0, + # Aim cone annealing (reward shaping curriculum) + aim_cone_start=0.35, # Starting reward cone (radians, ~20°) + aim_cone_end=0.087, # Ending reward cone (radians, ~5°) + aim_anneal_episodes=50000, # Episodes to fully anneal + ): + # Observation size depends on scheme + obs_size = OBS_SIZES.get(obs_scheme, 19) + self.obs_scheme = obs_scheme + self.single_observation_space = gymnasium.spaces.Box( + low=-1, + high=1, + shape=(obs_size,), + dtype=np.float32, + ) + + # Action: Box(5) continuous [-1, 1] + # [0] throttle, [1] elevator, [2] ailerons, [3] rudder, [4] trigger + self.single_action_space = gymnasium.spaces.Box( + low=-1, high=1, shape=(5,), dtype=np.float32 + ) + + self.num_agents = num_envs + self.render_mode = render_mode + self.render_fps = render_fps + self.report_interval = report_interval + self.tick = 0 + + super().__init__(buf) + self.actions = self.actions.astype(np.float32) # REQUIRED for continuous + + # Print hyperparameters at init (for sweep debugging) + print(f"=== DOGFIGHT ENV INIT ===") + print(f" obs_scheme={obs_scheme}, num_envs={num_envs}") + print(f" REWARDS: tail={reward_tail_scale:.4f} track={reward_tracking:.4f} fire={reward_firing_solution:.4f}") + print(f" approach={reward_approach:.4f} level={reward_level:.4f} closing={reward_closing_scale:.4f}") + print(f" PENALTY: bias={penalty_bias:.4f} ail={penalty_aileron:.4f} roll={penalty_roll:.4f}") + print(f" neg_g={penalty_neg_g:.4f} rudder={penalty_rudder:.4f} stall={penalty_stall:.4f}") + print(f" curriculum={curriculum_enabled}, episodes_per_stage={episodes_per_stage}") + print(f" AIM CONE: start={aim_cone_start:.3f} end={aim_cone_end:.3f} anneal_eps={aim_anneal_episodes}") + + self._env_handles = [] + for env_num in range(num_envs): + handle = binding.env_init( + self.observations[env_num:(env_num+1)], + self.actions[env_num:(env_num+1)], + self.rewards[env_num:(env_num+1)], + self.terminals[env_num:(env_num+1)], + self.truncations[env_num:(env_num+1)], + env_num, + env_num=env_num, + report_interval=self.report_interval, + max_steps=max_steps, + obs_scheme=obs_scheme, + # Curriculum learning + curriculum_enabled=curriculum_enabled, + curriculum_randomize=curriculum_randomize, + episodes_per_stage=episodes_per_stage, + # Reward config (all sweepable) + reward_closing_scale=reward_closing_scale, + reward_tail_scale=reward_tail_scale, + reward_tracking=reward_tracking, + reward_firing_solution=reward_firing_solution, + penalty_stall=penalty_stall, + penalty_roll=penalty_roll, + penalty_neg_g=penalty_neg_g, + penalty_rudder=penalty_rudder, + penalty_aileron=penalty_aileron, + penalty_bias=penalty_bias, + reward_approach=reward_approach, + reward_level=reward_level, + alt_max=alt_max, + speed_min=speed_min, + # Aim cone annealing + aim_cone_start=aim_cone_start, + aim_cone_end=aim_cone_end, + aim_anneal_episodes=aim_anneal_episodes, + ) + self._env_handles.append(handle) + + self.c_envs = binding.vectorize(*self._env_handles) + + def reset(self, seed=None): + self.tick = 0 + binding.vec_reset(self.c_envs, seed if seed else 0) + return self.observations, [] + + def step(self, actions): + self.actions[:] = actions + + self.tick += 1 + binding.vec_step(self.c_envs) + + # Auto-render if render_mode is 'human' (Gymnasium convention) + if self.render_mode == 'human': + self.render() + if self.render_fps: + time.sleep(1.0 / self.render_fps) + + info = [] + if self.tick % self.report_interval == 0: + log_data = binding.vec_log(self.c_envs) + if log_data: + info.append(log_data) + + return (self.observations, self.rewards, self.terminals, self.truncations, info) + + def render(self): + binding.vec_render(self.c_envs, 0) + + def close(self): + binding.vec_close(self.c_envs) + + def force_state( + self, + env_idx=0, + player_pos=None, # (x, y, z) tuple, default (0, 0, 1000) + player_vel=None, # (vx, vy, vz) tuple, default (150, 0, 0) + player_ori=None, # (w, x, y, z) quaternion, default (1, 0, 0, 0) = wings level + player_throttle=1.0, # [0, 1], default full throttle + opponent_pos=None, # (x, y, z) or None for auto (400m ahead) + opponent_vel=None, # (vx, vy, vz) or None for auto (match player) + opponent_ori=None, # (w, x, y, z) or None for auto (match player) + tick=0, + ): + """ + Force exact game state for testing/debugging. + + Usage: + env.force_state(player_pos=(-1500, 0, 1000), player_vel=(150, 0, 0)) + env.force_state(player_vel=(80, 0, 0)) # Just change velocity + """ + # Build kwargs for C binding + kwargs = {'tick': tick, 'p_throttle': player_throttle} + + # Player position + if player_pos is not None: + kwargs['p_px'], kwargs['p_py'], kwargs['p_pz'] = player_pos + + # Player velocity + if player_vel is not None: + kwargs['p_vx'], kwargs['p_vy'], kwargs['p_vz'] = player_vel + + # Player orientation + if player_ori is not None: + kwargs['p_ow'], kwargs['p_ox'], kwargs['p_oy'], kwargs['p_oz'] = player_ori + + # Opponent position (None = auto) + if opponent_pos is not None: + kwargs['o_px'], kwargs['o_py'], kwargs['o_pz'] = opponent_pos + + # Opponent velocity (None = auto) + if opponent_vel is not None: + kwargs['o_vx'], kwargs['o_vy'], kwargs['o_vz'] = opponent_vel + + # Opponent orientation (None = auto) + if opponent_ori is not None: + kwargs['o_ow'], kwargs['o_ox'], kwargs['o_oy'], kwargs['o_oz'] = opponent_ori + + # Call C binding with the specific env handle + binding.env_force_state(self._env_handles[env_idx], **kwargs) + + def get_state(self, env_idx=0): + """ + Get raw player state (independent of observation scheme). + + Returns dict with keys: + px, py, pz: Position + vx, vy, vz: Velocity + ow, ox, oy, oz: Orientation quaternion + up_x, up_y, up_z: Up vector (derived from quaternion) + fwd_x, fwd_y, fwd_z: Forward vector (derived from quaternion) + throttle: Current throttle + + Useful for physics tests that need exact state regardless of obs_scheme. + """ + return binding.env_get_state(self._env_handles[env_idx]) + + def set_autopilot( + self, + env_idx=0, + mode=AutopilotMode.STRAIGHT, + throttle=1.0, + bank_deg=30.0, + climb_rate=5.0, + ): + """ + Set autopilot mode for opponent aircraft. + + Args: + env_idx: Environment index, or None for all environments + mode: AutopilotMode constant (STRAIGHT, LEVEL, TURN_LEFT, etc.) + throttle: Target throttle [0, 1] + bank_deg: Bank angle for turn modes (degrees) + climb_rate: Target vertical velocity for climb/descend (m/s) + + Usage: + env.set_autopilot(mode=AutopilotMode.LEVEL) # Level flight, env 0 + env.set_autopilot(mode=AutopilotMode.TURN_RIGHT, bank_deg=45) # 45° right turn + env.set_autopilot(mode=AutopilotMode.RANDOM) # Randomize each episode + env.set_autopilot(env_idx=None, mode=AutopilotMode.RANDOM) # All envs + """ + if env_idx is None: + # Vectorized: set all envs at once + binding.vec_set_autopilot( + self.c_envs, + mode=mode, + throttle=throttle, + bank_deg=bank_deg, + climb_rate=climb_rate, + ) + else: + # Single env + binding.env_set_autopilot( + self._env_handles[env_idx], + mode=mode, + throttle=throttle, + bank_deg=bank_deg, + climb_rate=climb_rate, + ) + + def set_mode_weights(self, level=0.2, turn_left=0.2, turn_right=0.2, + climb=0.2, descend=0.2): + """ + Set probability weights for AP_RANDOM mode selection. + + Weights should sum to 1.0. Used for curriculum learning to bias + toward easier modes (e.g., LEVEL, STRAIGHT turns) early in training. + + Args: + level: Weight for AP_LEVEL (maintain altitude) + turn_left: Weight for AP_TURN_LEFT + turn_right: Weight for AP_TURN_RIGHT + climb: Weight for AP_CLIMB + descend: Weight for AP_DESCEND + """ + binding.vec_set_mode_weights( + self.c_envs, + level=level, turn_left=turn_left, turn_right=turn_right, + climb=climb, descend=descend, + ) + + def get_autopilot_mode(self, env_idx=0): + """Get current autopilot mode for an environment (for testing/debugging).""" + return binding.env_get_autopilot_mode(self._env_handles[env_idx]) + + +def test_performance(timeout=10, atn_cache=1024): + env = Dogfight(num_envs=1000) + env.reset() + tick = 0 + + actions = [env.action_space.sample() for _ in range(atn_cache)] + + import time + start = time.time() + while time.time() - start < timeout: + atn = actions[tick % atn_cache] + env.step(atn) + tick += 1 + + print(f"SPS: {env.num_agents * tick / (time.time() - start)}") + + +if __name__ == "__main__": + test_performance() diff --git a/pufferlib/ocean/dogfight/dogfight_test.c b/pufferlib/ocean/dogfight/dogfight_test.c new file mode 100644 index 000000000..7e1b88527 --- /dev/null +++ b/pufferlib/ocean/dogfight/dogfight_test.c @@ -0,0 +1,1428 @@ +#include +#include +#include +#include "dogfight.h" + +#define ASSERT_NEAR(a, b, eps) assert(fabs((a) - (b)) < (eps)) + +static float obs_buf[32]; // Enough for current and future obs +static float act_buf[5]; +static float rew_buf[1]; +static unsigned char term_buf[1]; + +static Dogfight make_env(int max_steps) { + Dogfight env = {0}; + env.observations = obs_buf; + env.actions = act_buf; + env.rewards = rew_buf; + env.terminals = term_buf; + env.max_steps = max_steps; + // Default reward config + RewardConfig rcfg = { + .closing_scale = 0.002f, .tail_scale = 0.005f, + .tracking = 0.05f, .firing_solution = 0.1f, + .stall = 0.002f, .roll = 0.0001f, + .neg_g = 0.0005f, .rudder = 0.0002f, + .alt_max = 2500.0f, .speed_min = 50.0f, + }; + init(&env, 0, &rcfg, 0, 0, 15000, 0.35f, 0.087f, 50000, 0); // curriculum_enabled=0, aim_cone defaults + return env; +} + +void test_vec3_math() { + Vec3 a = vec3(1, 2, 3); + Vec3 b = vec3(4, 5, 6); + + Vec3 sum = add3(a, b); + assert(sum.x == 5 && sum.y == 7 && sum.z == 9); + + Vec3 diff = sub3(b, a); + assert(diff.x == 3 && diff.y == 3 && diff.z == 3); + + Vec3 scaled = mul3(a, 2); + assert(scaled.x == 2 && scaled.y == 4 && scaled.z == 6); + + float d = dot3(a, b); + assert(d == 32); // 1*4 + 2*5 + 3*6 = 32 + + ASSERT_NEAR(norm3(vec3(3, 4, 0)), 5.0f, 1e-6f); + + printf("test_vec3_math PASS\n"); +} + +void test_quat_math() { + Quat identity = quat(1, 0, 0, 0); + Vec3 v = vec3(1, 0, 0); + Vec3 rotated = quat_rotate(identity, v); + ASSERT_NEAR(rotated.x, 1.0f, 1e-6f); + ASSERT_NEAR(rotated.y, 0.0f, 1e-6f); + ASSERT_NEAR(rotated.z, 0.0f, 1e-6f); + + // 90 degree rotation around Z axis + Quat rot_z = quat_from_axis_angle(vec3(0, 0, 1), PI / 2); + Vec3 v2 = quat_rotate(rot_z, vec3(1, 0, 0)); + ASSERT_NEAR(v2.x, 0.0f, 1e-5f); + ASSERT_NEAR(v2.y, 1.0f, 1e-5f); + ASSERT_NEAR(v2.z, 0.0f, 1e-5f); + + printf("test_quat_math PASS\n"); +} + +void test_init() { + Dogfight env = make_env(1000); + assert(env.tick == 0); + assert(env.episode_return == 0.0f); + assert(env.log.n == 0.0f); + assert(env.client == NULL); + printf("test_init PASS\n"); +} + +void test_reset_plane() { + Plane p; + Vec3 pos = vec3(100, 200, 300); + Vec3 vel = vec3(80, 0, 0); + reset_plane(&p, pos, vel); + + assert(p.pos.x == 100 && p.pos.y == 200 && p.pos.z == 300); + assert(p.vel.x == 80 && p.vel.y == 0 && p.vel.z == 0); + assert(p.ori.w == 1 && p.ori.x == 0 && p.ori.y == 0 && p.ori.z == 0); + assert(p.throttle == 0.5f); + + printf("test_reset_plane PASS\n"); +} + +void test_c_reset() { + Dogfight env = make_env(1000); + c_reset(&env); + + assert(env.tick == 0); + assert(env.episode_return == 0.0f); + + // Player spawned in bounds + assert(env.player.pos.x >= -500 && env.player.pos.x <= 500); + assert(env.player.pos.y >= -500 && env.player.pos.y <= 500); + assert(env.player.pos.z >= 500 && env.player.pos.z <= 1500); + + // Velocity set + assert(env.player.vel.x == 80); + + printf("test_c_reset PASS\n"); +} + +void test_compute_observations() { + // Tests ANGLES scheme (scheme 0, 12 obs) + Dogfight env = make_env(1000); + env.player.pos = vec3(1000, 500, 1500); + env.player.vel = vec3(125, 0, 0); + env.player.ori = quat(1, 0, 0, 0); // identity = facing +X, level + + compute_observations(&env); + + // ANGLES scheme layout: + // [0-2] pos normalized + ASSERT_NEAR(env.observations[0], 1000.0f / WORLD_HALF_X, 1e-6f); + ASSERT_NEAR(env.observations[1], 500.0f / WORLD_HALF_Y, 1e-6f); + ASSERT_NEAR(env.observations[2], 1500.0f / WORLD_MAX_Z, 1e-6f); + + // [3] speed normalized (scalar) + ASSERT_NEAR(env.observations[3], 125.0f / MAX_SPEED, 1e-6f); + + // [4-6] euler angles (all 0 for identity quaternion) + ASSERT_NEAR(env.observations[4], 0.0f, 1e-5f); // pitch / PI + ASSERT_NEAR(env.observations[5], 0.0f, 1e-5f); // roll / PI + ASSERT_NEAR(env.observations[6], 0.0f, 1e-5f); // yaw / PI + + // [7-11] target angles - depend on opponent position, check valid ranges + assert(env.observations[7] >= -1.0f && env.observations[7] <= 1.0f); // azimuth + assert(env.observations[8] >= -1.0f && env.observations[8] <= 1.0f); // elevation + assert(env.observations[9] >= -2.0f && env.observations[9] <= 2.0f); // distance + assert(env.observations[10] >= -1.0f && env.observations[10] <= 1.0f); // closing_rate + assert(env.observations[11] >= -1.0f && env.observations[11] <= 1.0f); // opp_heading + + printf("test_compute_observations PASS\n"); +} + +void test_c_step_moves_forward() { + Dogfight env = make_env(1000); + c_reset(&env); + + float initial_x = env.player.pos.x; + + // Set neutral actions for stable flight + env.actions[0] = 0.5f; // moderate throttle + env.actions[1] = 0.0f; + env.actions[2] = 0.0f; + env.actions[3] = 0.0f; + + c_step(&env); + + // With physics, plane should still move forward (roughly) + assert(env.player.pos.x > initial_x); + assert(env.tick == 1); + + printf("test_c_step_moves_forward PASS\n"); +} + +void test_oob_terminates() { + Dogfight env = make_env(1000); + c_reset(&env); + + // Place plane just past boundary + env.player.pos = vec3(WORLD_HALF_X + 1, 0, 1000); + env.player.vel = vec3(80, 0, 0); + + c_step(&env); + + assert(env.terminals[0] == 1); + assert(env.log.n == 1.0f); // Episode logged + + printf("test_oob_terminates PASS\n"); +} + +void test_max_steps_terminates() { + Dogfight env = make_env(5); + c_reset(&env); + + // Place plane in center, won't go OOB + env.player.pos = vec3(0, 0, 1000); + env.player.vel = vec3(10, 0, 0); // Slow + + for (int i = 0; i < 4; i++) { + c_step(&env); + assert(env.terminals[0] == 0); + } + + c_step(&env); // Step 5 should terminate + assert(env.terminals[0] == 1); + + printf("test_max_steps_terminates PASS\n"); +} + +void test_supersonic_terminates() { + Dogfight env = make_env(1000); + c_reset(&env); + + // Place player at 400 m/s (> 340 m/s limit) + env.player.pos = vec3(0, 0, 1000); + env.player.vel = vec3(400, 0, 0); // Supersonic! + env.player.ori = quat(1, 0, 0, 0); + env.opponent.pos = vec3(300, 0, 1000); + env.opponent.vel = vec3(80, 0, 0); + + c_step(&env); + + assert(env.terminals[0] == 1); + assert(env.rewards[0] == -1.0f); + + printf("test_supersonic_terminates PASS\n"); +} + +// Phase 2 tests + +void test_opponent_spawns() { + Dogfight env = make_env(1000); + c_reset(&env); + + // Opponent should exist and be ahead of player + float dx = env.opponent.pos.x - env.player.pos.x; + assert(dx >= 200 && dx <= 500); + assert(env.opponent.vel.x == 80); + + printf("test_opponent_spawns PASS\n"); +} + +void test_relative_observations() { + // Tests ANGLES scheme relative target info (azimuth, elevation, distance) + Dogfight env = make_env(1000); + c_reset(&env); + + // Place planes at known positions + // Player at origin facing +X, opponent directly ahead and slightly right/up + env.player.pos = vec3(0, 0, 1000); + env.player.vel = vec3(80, 0, 0); + env.player.ori = quat(1, 0, 0, 0); // identity = facing +X + env.opponent.pos = vec3(500, 100, 1050); // 500m ahead, 100m right, 50m up + env.opponent.vel = vec3(80, 0, 0); + env.opponent.ori = quat(1, 0, 0, 0); + + compute_observations(&env); + + // ANGLES scheme: relative position encoded as azimuth [7] and elevation [8] + // rel_pos in body frame = (500, 100, 50) since identity orientation + // azimuth = atan2(100, 500) / PI ≈ 0.063 + // elevation = atan2(50, sqrt(500^2+100^2)) / (PI/2) ≈ 0.062 + float azimuth = env.observations[7]; + float elevation = env.observations[8]; + float distance = env.observations[9]; + + // Azimuth should be small positive (opponent slightly right) + float expected_az = atan2f(100.0f, 500.0f) / PI; // ~0.063 + ASSERT_NEAR(azimuth, expected_az, 1e-4f); + + // Elevation should be small positive (opponent slightly above) + float r_horiz = sqrtf(500*500 + 100*100); + float expected_el = atan2f(50.0f, r_horiz) / (PI * 0.5f); // ~0.062 + ASSERT_NEAR(elevation, expected_el, 1e-4f); + + // Distance: sqrt(500^2 + 100^2 + 50^2) ≈ 512m, normalized + float dist = sqrtf(500*500 + 100*100 + 50*50); + float expected_dist = clampf(dist / GUN_RANGE, 0.0f, 4.0f) - 2.0f; + ASSERT_NEAR(distance, expected_dist, 1e-4f); + + printf("test_relative_observations PASS\n"); +} + +void test_pursuit_reward() { + Dogfight env = make_env(1000); + c_reset(&env); + + // Place opponent far away + env.player.pos = vec3(0, 0, 1000); + env.opponent.pos = vec3(1000, 0, 1000); + + c_step(&env); + float reward_far = env.rewards[0]; + + // Place opponent close + c_reset(&env); + env.player.pos = vec3(0, 0, 1000); + env.opponent.pos = vec3(100, 0, 1000); + + c_step(&env); + float reward_close = env.rewards[0]; + + // Closer should give better (less negative) reward + assert(reward_close > reward_far); + + printf("test_pursuit_reward PASS\n"); +} + +// Phase 3 tests + +void test_aircraft_params() { + // Check that aircraft parameters are defined with reasonable values + assert(MASS > 0 && MASS < 10000); // kg, WW2 fighter ~2500-4000kg + assert(WING_AREA > 0 && WING_AREA < 100); // m², WW2 fighter ~15-25m² + assert(C_D0 > 0 && C_D0 < 0.1); // parasitic drag coef + assert(K > 0 && K < 0.5); // induced drag factor + assert(C_L_MAX > 0 && C_L_MAX < 2.0); // max lift coef + assert(C_L_ALPHA > 0 && C_L_ALPHA < 10); // lift slope ~5.7/rad + assert(ENGINE_POWER > 0); // watts + assert(GRAVITY > 9 && GRAVITY < 10); // m/s² + + printf("test_aircraft_params PASS\n"); +} + +void test_throttle_accelerates() { + Dogfight env = make_env(1000); + c_reset(&env); + + // Place plane level, flying forward + env.player.pos = vec3(0, 0, 1000); + env.player.vel = vec3(100, 0, 0); + env.player.ori = quat(1, 0, 0, 0); + + float speed_before = norm3(env.player.vel); + + // Full throttle + env.actions[0] = 1.0f; // throttle + env.actions[1] = 0.0f; // elevator + env.actions[2] = 0.0f; // ailerons + env.actions[3] = 0.0f; // rudder + + for (int i = 0; i < 50; i++) c_step(&env); + + float speed_after = norm3(env.player.vel); + + // With thrust, should accelerate (or at least maintain speed against drag) + assert(speed_after >= speed_before * 0.9f); + + printf("test_throttle_accelerates PASS\n"); +} + +void test_plane_falls_without_lift() { + Dogfight env = make_env(1000); + c_reset(&env); + + // Place plane with no forward velocity (stalled) + env.player.pos = vec3(0, 0, 1000); + env.player.vel = vec3(0, 0, 0); + env.player.ori = quat(1, 0, 0, 0); + + float z_before = env.player.pos.z; + + // Zero throttle + env.actions[0] = -1.0f; + env.actions[1] = 0.0f; + env.actions[2] = 0.0f; + env.actions[3] = 0.0f; + + for (int i = 0; i < 50; i++) c_step(&env); + + float z_after = env.player.pos.z; + + // Should fall due to gravity + assert(z_after < z_before); + // Should have fallen at least 0.5 * g * t² ≈ 0.5 * 10 * 1² = 5m in 1 sec + assert(z_before - z_after > 3.0f); + + printf("test_plane_falls_without_lift PASS\n"); +} + +void test_controls_affect_orientation() { + Dogfight env = make_env(1000); + + // Test pitch (elevator) + c_reset(&env); + env.player.pos = vec3(0, 0, 1000); + env.player.vel = vec3(100, 0, 0); + env.player.ori = quat(1, 0, 0, 0); + Quat ori_before = env.player.ori; + + env.actions[0] = 0.0f; + env.actions[1] = 1.0f; // full elevator (pitch) + env.actions[2] = 0.0f; + env.actions[3] = 0.0f; + + for (int i = 0; i < 10; i++) c_step(&env); + + // Orientation should have changed + float dot = ori_before.w * env.player.ori.w + + ori_before.x * env.player.ori.x + + ori_before.y * env.player.ori.y + + ori_before.z * env.player.ori.z; + assert(fabsf(dot) < 0.999f); // not identical + + // Test roll (ailerons) + c_reset(&env); + env.player.pos = vec3(0, 0, 1000); + env.player.vel = vec3(100, 0, 0); + env.player.ori = quat(1, 0, 0, 0); + ori_before = env.player.ori; + + env.actions[0] = 0.0f; + env.actions[1] = 0.0f; + env.actions[2] = 1.0f; // full ailerons (roll) + env.actions[3] = 0.0f; + + for (int i = 0; i < 10; i++) c_step(&env); + + dot = ori_before.w * env.player.ori.w + + ori_before.x * env.player.ori.x + + ori_before.y * env.player.ori.y + + ori_before.z * env.player.ori.z; + assert(fabsf(dot) < 0.999f); + + printf("test_controls_affect_orientation PASS\n"); +} + +void test_dynamic_pressure() { + // q = 0.5 * rho * V² + // At V=100 m/s: q = 0.5 * 1.225 * 10000 = 6125 Pa + float V = 100.0f; + float q = 0.5f * 1.225f * V * V; + ASSERT_NEAR(q, 6125.0f, 1.0f); + + printf("test_dynamic_pressure PASS\n"); +} + +void test_lift_opposes_gravity() { + Dogfight env = make_env(1000); + c_reset(&env); + + // Fly level at cruise speed with moderate throttle + env.player.pos = vec3(0, 0, 1000); + env.player.vel = vec3(120, 0, 0); // ~270 mph, reasonable cruise + env.player.ori = quat(1, 0, 0, 0); + + float z_before = env.player.pos.z; + + // Moderate throttle to maintain speed + env.actions[0] = 0.5f; + env.actions[1] = 0.0f; + env.actions[2] = 0.0f; + env.actions[3] = 0.0f; + + // Run for 1 second (50 steps at 0.02s) + for (int i = 0; i < 50; i++) c_step(&env); + + float z_after = env.player.pos.z; + + // With lift, altitude change should be much less than free fall + // Free fall: 0.5 * g * t² = 0.5 * 10 * 1 = 5m + // With lift: should lose less than 5m (ideally close to 0) + float dz = fabsf(z_after - z_before); + assert(dz < 50.0f); // generous tolerance for now + + printf("test_lift_opposes_gravity PASS\n"); +} + +void test_drag_slows_plane() { + Dogfight env = make_env(1000); + c_reset(&env); + + // High speed, zero throttle + env.player.pos = vec3(0, 0, 1000); + env.player.vel = vec3(200, 0, 0); + env.player.ori = quat(1, 0, 0, 0); + + float speed_before = norm3(env.player.vel); + + env.actions[0] = -1.0f; // zero throttle + env.actions[1] = 0.0f; + env.actions[2] = 0.0f; + env.actions[3] = 0.0f; + + for (int i = 0; i < 100; i++) c_step(&env); + + float speed_after = norm3(env.player.vel); + + // Drag should slow the plane + assert(speed_after < speed_before); + + printf("test_drag_slows_plane PASS\n"); +} + +void test_stall_clamps_lift() { + // Verify C_L clamping actually happens in physics + // A plane pointed straight up (90° alpha) should not get infinite lift + Dogfight env = make_env(1000); + c_reset(&env); + + // Flying forward at speed + env.player.pos = vec3(0, 0, 1000); + env.player.vel = vec3(100, 0, 0); + // Point nose straight up (90° pitch) + env.player.ori = quat_from_axis_angle(vec3(0, 1, 0), PI / 2); + + env.actions[0] = 0.5f; + env.actions[1] = 0.0f; + env.actions[2] = 0.0f; + env.actions[3] = 0.0f; + + float z_before = env.player.pos.z; + for (int i = 0; i < 25; i++) c_step(&env); + float z_after = env.player.pos.z; + + // With stall limiting, plane should NOT shoot up massively + // Max C_L = 1.4, at 100 m/s: L = 1.4 * 6125 * 22 = 188,650 N + // That's ~6.4g, so it can climb but not infinitely + // Should still fall or climb modestly, not go to space + assert(z_after - z_before < 500.0f); // reasonable bound + + printf("test_stall_clamps_lift PASS\n"); +} + +void test_glimit_clamps_acceleration() { + // Verify g-limit actually clamps extreme forces + Dogfight env = make_env(1000); + c_reset(&env); + + // High speed for lots of lift potential + env.player.pos = vec3(0, 0, 1000); + env.player.vel = vec3(200, 0, 0); + env.player.ori = quat(1, 0, 0, 0); + + // Full back stick to pitch up hard + env.actions[0] = 1.0f; + env.actions[1] = 1.0f; // full pitch + env.actions[2] = 0.0f; + env.actions[3] = 0.0f; + + for (int i = 0; i < 10; i++) c_step(&env); + + // Check that acceleration is bounded + // At 200 m/s, dynamic pressure is huge, but g-limit should cap it + // After pulling, vertical velocity should exist but not be insane + float vz = env.player.vel.z; + // At 8g for 0.2s: delta_v = 8 * 9.81 * 0.2 = ~16 m/s max vertical + assert(fabsf(vz) < 200.0f); // sanity check + + printf("test_glimit_clamps_acceleration PASS\n"); +} + +void test_forces_sum_correctly() { + // Test 3.17: verify all forces contribute to motion + Dogfight env = make_env(1000); + c_reset(&env); + + // Level flight at moderate speed + env.player.pos = vec3(0, 0, 1000); + env.player.vel = vec3(100, 0, 0); + env.player.ori = quat(1, 0, 0, 0); + + // No throttle - should slow down (drag) and fall (gravity > lift at zero alpha) + env.actions[0] = -1.0f; + env.actions[1] = 0.0f; + env.actions[2] = 0.0f; + env.actions[3] = 0.0f; + + float vx_before = env.player.vel.x; + float z_before = env.player.pos.z; + + for (int i = 0; i < 50; i++) c_step(&env); + + // Drag slows forward motion + assert(env.player.vel.x < vx_before); + // Gravity pulls down (at zero alpha, minimal lift) + assert(env.player.pos.z < z_before); + + printf("test_forces_sum_correctly PASS\n"); +} + +void test_integration_updates_state() { + // Test 3.18: verify Euler integration updates pos and vel + Dogfight env = make_env(1000); + c_reset(&env); + + env.player.pos = vec3(0, 0, 1000); + env.player.vel = vec3(100, 0, 0); + env.player.ori = quat(1, 0, 0, 0); + + Vec3 pos_before = env.player.pos; + Vec3 vel_before = env.player.vel; + + env.actions[0] = 0.5f; + env.actions[1] = 0.0f; + env.actions[2] = 0.0f; + env.actions[3] = 0.0f; + + c_step(&env); + + // Position should change (velocity integration) + assert(env.player.pos.x != pos_before.x || + env.player.pos.y != pos_before.y || + env.player.pos.z != pos_before.z); + + // Velocity should change (force integration) + assert(env.player.vel.x != vel_before.x || + env.player.vel.y != vel_before.y || + env.player.vel.z != vel_before.z); + + printf("test_integration_updates_state PASS\n"); +} + +// Phase 3.5 tests + +void test_closing_velocity_reward() { + Dogfight env = make_env(1000); + c_reset(&env); + + // Scenario 1: Player approaching opponent (closing) + env.player.pos = vec3(0, 0, 1000); + env.player.vel = vec3(100, 0, 0); // Moving toward opponent + env.opponent.pos = vec3(500, 0, 1000); + env.opponent.vel = vec3(50, 0, 0); // Moving slower + + c_step(&env); + float reward_closing = env.rewards[0]; + + // Scenario 2: Player moving away from opponent (opening) + c_reset(&env); + env.player.pos = vec3(0, 0, 1000); + env.player.vel = vec3(-100, 0, 0); // Moving away from opponent + env.opponent.pos = vec3(500, 0, 1000); + env.opponent.vel = vec3(50, 0, 0); + + c_step(&env); + float reward_opening = env.rewards[0]; + + // Closing should give better reward than opening + assert(reward_closing > reward_opening); + + printf("test_closing_velocity_reward PASS\n"); +} + +void test_tail_position_reward() { + Dogfight env = make_env(1000); + c_reset(&env); + + // Scenario 1: Player behind opponent (good position) + env.player.pos = vec3(0, 0, 1000); + env.player.vel = vec3(100, 0, 0); + env.player.ori = quat(1, 0, 0, 0); // Facing +X + env.opponent.pos = vec3(300, 0, 1000); + env.opponent.vel = vec3(100, 0, 0); + env.opponent.ori = quat(1, 0, 0, 0); // Opponent also facing +X (player behind) + + c_step(&env); + float reward_behind = env.rewards[0]; + + // Scenario 2: Player in front of opponent (bad position) + c_reset(&env); + env.player.pos = vec3(300, 0, 1000); + env.player.vel = vec3(100, 0, 0); + env.player.ori = quat(1, 0, 0, 0); + env.opponent.pos = vec3(0, 0, 1000); + env.opponent.vel = vec3(100, 0, 0); + env.opponent.ori = quat(1, 0, 0, 0); // Opponent facing player (player in front) + + c_step(&env); + float reward_front = env.rewards[0]; + + // Being behind should give better reward + assert(reward_behind > reward_front); + + printf("test_tail_position_reward PASS\n"); +} + +void test_speed_penalty() { + Dogfight env = make_env(1000); + + // Scenario 1: Good speed (100 m/s) + c_reset(&env); + env.player.pos = vec3(0, 0, 1000); + env.player.vel = vec3(100, 0, 0); + env.opponent.pos = vec3(300, 0, 1000); + + c_step(&env); + float reward_good_speed = env.rewards[0]; + + // Scenario 2: Too slow (20 m/s - stall risk) + c_reset(&env); + env.player.pos = vec3(0, 0, 1000); + env.player.vel = vec3(20, 0, 0); + env.opponent.pos = vec3(300, 0, 1000); + + c_step(&env); + float reward_slow = env.rewards[0]; + + // Good speed should have better reward + assert(reward_good_speed > reward_slow); + + printf("test_speed_penalty PASS\n"); +} + +// Phase 4: Rendering tests (camera math only, no actual drawing) +void test_chase_camera_behind_player() { + // Test that camera is positioned behind player based on orientation + Dogfight env = make_env(1000); + c_reset(&env); + + // Set player at origin, facing +X + env.player.pos = vec3(0, 0, 500); + env.player.ori = quat(1, 0, 0, 0); // identity = facing +X + + // Calculate camera position (same logic as c_render) + Vec3 fwd = quat_rotate(env.player.ori, vec3(1, 0, 0)); + float dist = 80.0f; // default cam_distance + float el = 0.3f; // default cam_elevation + float az = 0.0f; // default cam_azimuth + + float cam_x = env.player.pos.x - fwd.x * dist * cosf(el) * cosf(az) + fwd.y * dist * sinf(az); + float cam_y = env.player.pos.y - fwd.y * dist * cosf(el) * cosf(az) - fwd.x * dist * sinf(az); + float cam_z = env.player.pos.z + dist * sinf(el) + 20.0f; + + // Camera should be behind player (negative X direction) and above + assert(cam_x < env.player.pos.x); // Behind + assert(cam_z > env.player.pos.z); // Above + ASSERT_NEAR(cam_y, 0.0f, 1.0f); // Same Y (player at Y=0) + + printf("test_chase_camera_behind_player PASS\n"); +} + +void test_camera_orbit_updates() { + // Test camera orbit math with different azimuth/elevation + Dogfight env = make_env(1000); + c_reset(&env); + + env.player.pos = vec3(0, 0, 500); + env.player.ori = quat(1, 0, 0, 0); + + Vec3 fwd = quat_rotate(env.player.ori, vec3(1, 0, 0)); + float dist = 80.0f; + + // Test with azimuth rotation (looking from side) + float az = PI / 2.0f; // 90 degrees + float el = 0.3f; + + float cam_x = env.player.pos.x - fwd.x * dist * cosf(el) * cosf(az) + fwd.y * dist * sinf(az); + float cam_y = env.player.pos.y - fwd.y * dist * cosf(el) * cosf(az) - fwd.x * dist * sinf(az); + + // With 90 degree azimuth, camera should be to the side (negative Y) + assert(cam_y < -30.0f); // Significantly to the side + + // Test elevation change + float el_high = 1.2f; // Looking from above + float cam_z_high = env.player.pos.z + dist * sinf(el_high) + 20.0f; + float cam_z_low = env.player.pos.z + dist * sinf(0.1f) + 20.0f; + + assert(cam_z_high > cam_z_low); // Higher elevation = higher camera + + printf("test_camera_orbit_updates PASS\n"); +} + +void test_client_struct_defaults() { + // Test that Client would be initialized with correct defaults + // (We can't actually test c_render without Raylib window, but we test the values) + float default_distance = 80.0f; + float default_azimuth = 0.0f; + float default_elevation = 0.3f; + + assert(default_distance > 30.0f && default_distance < 300.0f); + assert(default_elevation > -1.4f && default_elevation < 1.4f); + ASSERT_NEAR(default_azimuth, 0.0f, 0.01f); + + printf("test_client_struct_defaults PASS\n"); +} + +// Phase 5: Combat tests +void test_trigger_fires() { + Dogfight env = make_env(1000); + c_reset(&env); + + // Set up player far from opponent (won't hit) + env.player.pos = vec3(0, 0, 500); + env.player.ori = quat(1, 0, 0, 0); + env.player.fire_cooldown = 0; + env.opponent.pos = vec3(1000, 0, 500); // Far away, won't hit + + env.actions[4] = 1.0f; // Trigger pulled + + // Step to process fire + c_step(&env); + + // Should have fired (cooldown set) + assert(env.player.fire_cooldown == FIRE_COOLDOWN); + assert(env.episode_shots_fired >= 1.0f); + + printf("test_trigger_fires PASS\n"); +} + +void test_fire_cooldown() { + Dogfight env = make_env(1000); + c_reset(&env); + + // Set up player far from opponent (won't hit) + env.player.pos = vec3(0, 0, 500); + env.player.ori = quat(1, 0, 0, 0); + env.opponent.pos = vec3(1000, 0, 500); // Far away, won't hit + + // Fire once + env.player.fire_cooldown = 0; + env.actions[4] = 1.0f; + c_step(&env); + float shots_after_first = env.episode_shots_fired; + + // Try to fire again immediately (should be blocked by cooldown) + c_step(&env); + float shots_after_second = env.episode_shots_fired; + + // Should not have fired again (still on cooldown) + assert(shots_after_second == shots_after_first); + + printf("test_fire_cooldown PASS\n"); +} + +void test_cone_hit_detection() { + Dogfight env = make_env(1000); + c_reset(&env); + + // Place player at origin facing +X + env.player.pos = vec3(0, 0, 500); + env.player.ori = quat(1, 0, 0, 0); // Identity = facing +X + + // Place opponent directly ahead within range + env.opponent.pos = vec3(200, 0, 500); // 200m ahead, in cone + + assert(check_hit(&env.player, &env.opponent, env.cos_gun_cone) == true); + + // Place opponent too far + env.opponent.pos = vec3(600, 0, 500); // 600m > GUN_RANGE + assert(check_hit(&env.player, &env.opponent, env.cos_gun_cone) == false); + + // Place opponent at side (outside 5 degree cone) + env.opponent.pos = vec3(200, 50, 500); // ~14 degrees off-axis + assert(check_hit(&env.player, &env.opponent, env.cos_gun_cone) == false); + + // Place opponent slightly off-axis but within cone + env.opponent.pos = vec3(200, 10, 500); // ~2.8 degrees off-axis + assert(check_hit(&env.player, &env.opponent, env.cos_gun_cone) == true); + + printf("test_cone_hit_detection PASS\n"); +} + +void test_hit_reward() { + Dogfight env = make_env(1000); + c_reset(&env); + + // Set up guaranteed hit + env.player.pos = vec3(0, 0, 500); + env.player.ori = quat(1, 0, 0, 0); + env.player.fire_cooldown = 0; + env.opponent.pos = vec3(200, 0, 500); // Directly ahead + + env.actions[4] = 1.0f; // Fire + + c_step(&env); + + // Kill = terminal with reward 1.0 + assert(env.terminals[0] == 1); + assert(env.rewards[0] == 1.0f); + + printf("test_hit_reward PASS\n"); +} + +void test_kill_terminates_episode() { + Dogfight env = make_env(1000); + c_reset(&env); + + // Set up guaranteed hit + env.player.pos = vec3(0, 0, 500); + env.player.ori = quat(1, 0, 0, 0); + env.player.fire_cooldown = 0; + env.opponent.pos = vec3(200, 0, 500); + + env.actions[4] = 1.0f; + + c_step(&env); + + // Kill should terminate episode + assert(env.terminals[0] == 1); + + // Reward should be 1.0 (kill reward) + assert(env.rewards[0] == 1.0f); + + // Perf should be tracked (1 kill in 1 episode = 1.0) + assert(env.log.perf >= 1.0f); + assert(env.log.n >= 1.0f); + + printf("test_kill_terminates_episode PASS\n"); +} + +void test_combat_constants() { + // Verify combat constants are reasonable + assert(GUN_RANGE == 500.0f); + assert(GUN_CONE_ANGLE > 0.08f && GUN_CONE_ANGLE < 0.09f); // ~5 degrees + assert(FIRE_COOLDOWN == 10); + + printf("test_combat_constants PASS\n"); +} + +// Phase 3.6: Additional reward/penalty tests + +void test_roll_penalty() { + Dogfight env = make_env(1000); + c_reset(&env); + + // Neutral actions (don't fire!) + env.actions[0] = 0.0f; // throttle + env.actions[1] = 0.0f; // elevator + env.actions[2] = 0.0f; // ailerons + env.actions[3] = 0.0f; // rudder + env.actions[4] = -1.0f; // trigger (don't fire) + + // Place plane level, good altitude, opponent ahead + env.player.pos = vec3(0, 0, 1000); + env.player.vel = vec3(100, 0, 0); + env.player.ori = quat(1, 0, 0, 0); // Wings level + env.opponent.pos = vec3(300, 0, 1000); + env.opponent.vel = vec3(100, 0, 0); + env.opponent.ori = quat(1, 0, 0, 0); + + c_step(&env); + float reward_level = env.rewards[0]; + + // Now roll the plane to 90 degrees (pi/2 radians) + c_reset(&env); + env.actions[4] = -1.0f; // Don't fire + env.player.pos = vec3(0, 0, 1000); + env.player.vel = vec3(100, 0, 0); + env.player.ori = quat_from_axis_angle(vec3(1, 0, 0), PI / 2); // 90° roll + env.opponent.pos = vec3(300, 0, 1000); + env.opponent.vel = vec3(100, 0, 0); + env.opponent.ori = quat(1, 0, 0, 0); + + c_step(&env); + float reward_rolled = env.rewards[0]; + + // Rolled should have worse reward due to roll penalty + assert(reward_level > reward_rolled); + + // Verify magnitude: at 90° (pi/2 rad) with roll=0.0001, penalty = 0.000157 + float expected_penalty = (PI / 2) * 0.0001f; + float actual_diff = reward_level - reward_rolled; + ASSERT_NEAR(actual_diff, expected_penalty, 0.0001f); + + printf("test_roll_penalty PASS\n"); +} + +void test_tracking_reward() { + Dogfight env = make_env(1000); + + // Scenario 1: Opponent in gunsight (aim angle < 45°) + c_reset(&env); + env.actions[4] = -1.0f; // Don't fire + env.player.pos = vec3(0, 0, 1000); + env.player.vel = vec3(100, 0, 0); + env.player.ori = quat(1, 0, 0, 0); // Facing +X + env.opponent.pos = vec3(300, 0, 1000); // Directly ahead (0° off-axis) + env.opponent.vel = vec3(100, 0, 0); + env.opponent.ori = quat(1, 0, 0, 0); + c_step(&env); + float reward_on_target = env.rewards[0]; + + // Scenario 2: Opponent far off-axis (aim angle > 45°, no tracking reward) + c_reset(&env); + env.actions[4] = -1.0f; // Don't fire + env.player.pos = vec3(0, 0, 1000); + env.player.vel = vec3(100, 0, 0); + env.player.ori = quat(1, 0, 0, 0); + env.opponent.pos = vec3(0, 300, 1000); // 90° to the side + env.opponent.vel = vec3(100, 0, 0); + env.opponent.ori = quat(1, 0, 0, 0); + c_step(&env); + float reward_off_target = env.rewards[0]; + + // On target should have better reward (tracking bonus) + assert(reward_on_target > reward_off_target); + + printf("test_tracking_reward PASS\n"); +} + +void test_firing_solution_reward() { + Dogfight env = make_env(1000); + + // Perfect firing solution: aim < 5°, dist < GUN_RANGE (500m) + c_reset(&env); + env.actions[4] = -1.0f; // Don't fire + env.player.pos = vec3(0, 0, 1000); + env.player.vel = vec3(100, 0, 0); + env.player.ori = quat(1, 0, 0, 0); + env.opponent.pos = vec3(300, 0, 1000); // 300m ahead, in cone + env.opponent.vel = vec3(100, 0, 0); + env.opponent.ori = quat(1, 0, 0, 0); + c_step(&env); + float reward_solution = env.rewards[0]; + + // No firing solution: aim < 5° but dist > GUN_RANGE + c_reset(&env); + env.actions[4] = -1.0f; // Don't fire + env.player.pos = vec3(0, 0, 1000); + env.player.vel = vec3(100, 0, 0); + env.player.ori = quat(1, 0, 0, 0); + env.opponent.pos = vec3(600, 0, 1000); // 600m ahead, out of range + env.opponent.vel = vec3(100, 0, 0); + env.opponent.ori = quat(1, 0, 0, 0); + c_step(&env); + float reward_no_solution = env.rewards[0]; + + // Firing solution should give bonus + assert(reward_solution > reward_no_solution); + + printf("test_firing_solution_reward PASS\n"); +} + +// Helper to make env with curriculum enabled +static Dogfight make_env_curriculum(int max_steps, int randomize) { + Dogfight env = {0}; + env.observations = obs_buf; + env.actions = act_buf; + env.rewards = rew_buf; + env.terminals = term_buf; + env.max_steps = max_steps; + RewardConfig rcfg = { + .closing_scale = 0.002f, .tail_scale = 0.005f, + .tracking = 0.05f, .firing_solution = 0.1f, + .stall = 0.002f, .roll = 0.0001f, + .neg_g = 0.0005f, .rudder = 0.0002f, + .alt_max = 2500.0f, .speed_min = 50.0f, + }; + init(&env, 0, &rcfg, 1, randomize, 15000, 0.35f, 0.087f, 50000, 0); // curriculum_enabled=1 + return env; +} + +// Helper to make env with custom roll penalty (for accumulation test) +static Dogfight make_env_with_roll_penalty(int max_steps, float roll_penalty) { + Dogfight env = {0}; + env.observations = obs_buf; + env.actions = act_buf; + env.rewards = rew_buf; + env.terminals = term_buf; + env.max_steps = max_steps; + RewardConfig rcfg = { + .closing_scale = 0.002f, .tail_scale = 0.005f, + .tracking = 0.05f, .firing_solution = 0.1f, + .stall = 0.002f, + .roll = roll_penalty, .neg_g = 0.0005f, .rudder = 0.0002f, + .alt_max = 2500.0f, .speed_min = 50.0f, + }; + init(&env, 0, &rcfg, 0, 0, 15000, 0.35f, 0.087f, 50000, 0); + return env; +} + +void test_roll_penalty_accumulates() { + // Test that constant rolling accumulates meaningful penalty over multiple steps + // Use exaggerated roll penalty (10x default) for visibility + Dogfight env = make_env_with_roll_penalty(1000, 0.001f); + c_reset(&env); + + env.player.pos = vec3(0, 0, 1000); + env.player.vel = vec3(100, 0, 0); + env.opponent.pos = vec3(300, 0, 1000); + + // Full aileron with moderate throttle to maintain flight + float total_reward = 0.0f; + for (int i = 0; i < 50; i++) { + env.actions[0] = 0.5f; // Moderate throttle + env.actions[1] = 0.0f; // Neutral elevator + env.actions[2] = 1.0f; // Full right aileron (constant roll) + env.actions[3] = 0.0f; // Neutral rudder + env.actions[4] = -1.0f; // No fire + c_step(&env); + total_reward += env.rewards[0]; + + // Refresh opponent position (so distance reward stays similar) + env.opponent.pos = vec3(env.player.pos.x + 300, env.player.pos.y, env.player.pos.z); + } + + // Compare to level flight: same scenario but wings level + Dogfight env2 = make_env_with_roll_penalty(1000, 0.001f); + c_reset(&env2); + + env2.player.pos = vec3(0, 0, 1000); + env2.player.vel = vec3(100, 0, 0); + env2.opponent.pos = vec3(300, 0, 1000); + + float total_reward_level = 0.0f; + for (int i = 0; i < 50; i++) { + env2.actions[0] = 0.5f; + env2.actions[1] = 0.0f; + env2.actions[2] = 0.0f; // NO aileron (stay level) + env2.actions[3] = 0.0f; + env2.actions[4] = -1.0f; + c_step(&env2); + total_reward_level += env2.rewards[0]; + + env2.opponent.pos = vec3(env2.player.pos.x + 300, env2.player.pos.y, env2.player.pos.z); + } + + // Rolling should accumulate worse reward than level flight + assert(total_reward < total_reward_level); + + printf("test_roll_penalty_accumulates PASS\n"); +} + +// Helper to get bearing from player to opponent (degrees, 0=ahead, 90=right, 180=behind) +static float get_bearing(Dogfight *env) { + Vec3 rel = sub3(env->opponent.pos, env->player.pos); + Vec3 player_fwd = quat_rotate(env->player.ori, vec3(1, 0, 0)); + float dot = dot3(normalize3(rel), player_fwd); + return acosf(clampf(dot, -1, 1)) * 180.0f / M_PI; +} + +// Helper to get opponent heading (degrees, 0=+X, 90=+Y) +static float get_opponent_heading(Dogfight *env) { + Vec3 opp_fwd = quat_rotate(env->opponent.ori, vec3(1, 0, 0)); + return atan2f(opp_fwd.y, opp_fwd.x) * 180.0f / M_PI; +} + +void test_spawn_bearing_variety() { + // Test that FULL_RANDOM stage spawns opponents at various bearings (not just ahead) + // Use progressive mode and set total_episodes high enough to reach FULL_RANDOM (stage 4) + Dogfight env = make_env_curriculum(1000, 0); // Progressive mode + env.total_episodes = env.episodes_per_stage * 4; // Force stage 4 (FULL_RANDOM) + + int front_count = 0; // bearing < 45 + int side_count = 0; // bearing 45-135 + int behind_count = 0; // bearing > 135 + + // Run many resets with different seeds + for (int seed = 0; seed < 100; seed++) { + srand(seed * 7 + 13); // Vary seed + c_reset(&env); + + // Verify we're in stage 4 (FULL_RANDOM after 2026-01-18 reorder) + assert(env.stage == CURRICULUM_FULL_RANDOM); + + float bearing = get_bearing(&env); + if (bearing < 45.0f) front_count++; + else if (bearing > 135.0f) behind_count++; + else side_count++; + } + + // With 360° spawning, we should see opponents in all directions + // Each sector should have at least some spawns (allow for randomness) + assert(front_count > 0); // Some in front + assert(side_count > 0); // Some to the side + assert(behind_count > 0); // Some behind (this is the key test!) + + printf("test_spawn_bearing_variety PASS (front=%d, side=%d, behind=%d)\n", + front_count, side_count, behind_count); +} + +void test_spawn_heading_variety() { + // Test that FULL_RANDOM opponents have varied headings (not always 0) + // Use progressive mode and set total_episodes high enough to reach FULL_RANDOM (stage 4) + Dogfight env = make_env_curriculum(1000, 0); // Progressive mode + env.total_episodes = env.episodes_per_stage * 4; // Force stage 4 (FULL_RANDOM) + + float min_heading = 999.0f; + float max_heading = -999.0f; + int varied_count = 0; // Count of headings not near 0 + + for (int seed = 0; seed < 50; seed++) { + srand(seed * 11 + 17); + c_reset(&env); + + // Verify we're in stage 4 (FULL_RANDOM after 2026-01-18 reorder) + assert(env.stage == CURRICULUM_FULL_RANDOM); + + float heading = get_opponent_heading(&env); + if (heading < min_heading) min_heading = heading; + if (heading > max_heading) max_heading = heading; + if (fabsf(heading) > 30.0f) varied_count++; // Not facing +X + } + + // Headings should vary across the full 360° range + float heading_range = max_heading - min_heading; + assert(heading_range > 90.0f); // At least 90° variation + assert(varied_count > 10); // At least some not facing default direction + + printf("test_spawn_heading_variety PASS (range=%.0f°, varied=%d)\n", + heading_range, varied_count); +} + +void test_curriculum_stages_differ() { + // Test that different curriculum stages produce different spawn patterns + // Use progressive mode and manipulate total_episodes to get desired stages + Dogfight env = make_env_curriculum(1000, 0); // Progressive mode (randomize=0) + + // Stage 0: TAIL_CHASE - opponent ahead, same direction + // total_episodes < episodes_per_stage gives stage 0 + env.total_episodes = 0; + srand(42); + c_reset(&env); + float bearing_tail = get_bearing(&env); + float heading_tail = get_opponent_heading(&env); + assert(env.stage == CURRICULUM_TAIL_CHASE); + + // Stage 1: HEAD_ON - opponent ahead, facing us + env.total_episodes = env.episodes_per_stage; // Stage 1 + srand(42); + c_reset(&env); + float bearing_head = get_bearing(&env); + assert(env.stage == CURRICULUM_HEAD_ON); + + // Stage 2: VERTICAL - opponent above/below (after 2026-01-18 reorder, was stage 3) + env.total_episodes = env.episodes_per_stage * 2; // Stage 2 + srand(42); + c_reset(&env); + float bearing_vert = get_bearing(&env); + assert(env.stage == CURRICULUM_VERTICAL); + + // Stage 6: CROSSING - opponent to side (after 2026-01-18 reorder, was stage 2) + env.total_episodes = env.episodes_per_stage * 6; // Stage 6 + srand(42); + c_reset(&env); + float bearing_cross = get_bearing(&env); + assert(env.stage == CURRICULUM_CROSSING); + + // TAIL_CHASE should have opponent nearly ahead (small bearing) + assert(bearing_tail < 30.0f); + + // HEAD_ON should have opponent ahead + assert(bearing_head < 30.0f); + + // VERTICAL should have opponent ahead (same heading, different altitude) + assert(bearing_vert < 45.0f); + + // CROSSING should have opponent more to the side (larger bearing) + assert(bearing_cross > 45.0f); + + // TAIL_CHASE opponent should face same direction as player (~0° heading) + assert(fabsf(heading_tail) < 30.0f); + + printf("test_curriculum_stages_differ PASS (tail=%.0f°, head=%.0f°, vert=%.0f°, cross=%.0f°)\n", + bearing_tail, bearing_head, bearing_vert, bearing_cross); +} + +void test_spawn_distance_range() { + // Test that spawn distances are within expected ranges + Dogfight env = make_env_curriculum(1000, 1); + + float min_dist = 9999.0f; + float max_dist = 0.0f; + + for (int seed = 0; seed < 50; seed++) { + srand(seed * 13 + 7); + c_reset(&env); + + Vec3 rel = sub3(env.opponent.pos, env.player.pos); + float dist = norm3(rel); + + if (dist < min_dist) min_dist = dist; + if (dist > max_dist) max_dist = dist; + } + + // Distances should be reasonable (200-700m typical range across all stages) + assert(min_dist > 100.0f); // Not too close + assert(max_dist < 800.0f); // Not too far + assert(max_dist - min_dist > 100.0f); // Some variety + + printf("test_spawn_distance_range PASS (min=%.0f, max=%.0f)\n", min_dist, max_dist); +} + +void test_neg_g_penalty() { + // Test that pushing forward (negative G) gets worse reward than pulling back + // Now uses actual g_force (not elevator position), so we need multiple steps + // for the G-force to develop from the maneuver + // + // We set a high neg_g penalty to ensure it dominates other reward differences + // caused by trajectory changes during the maneuver + Dogfight env = make_env(1000); + env.rcfg.neg_g = 0.5f; // High penalty to dominate other rewards + c_reset(&env); + env.actions[4] = -1.0f; // Don't fire + + // Pull back for multiple steps + env.player.pos = vec3(0, 0, 1500); + env.player.vel = vec3(150, 0, 0); // Fast enough for significant G + env.player.ori = quat(1, 0, 0, 0); + env.opponent.pos = vec3(400, 0, 1500); + env.opponent.vel = vec3(150, 0, 0); + env.opponent.ori = quat(1, 0, 0, 0); + + env.actions[1] = -0.5f; // Pull back (nose up, positive G) + float total_reward_pull = 0.0f; + float pull_g_sum = 0.0f; + for (int i = 0; i < 30; i++) { + c_step(&env); + total_reward_pull += env.rewards[0]; + pull_g_sum += env.player.g_force; + } + float pull_g_avg = pull_g_sum / 30.0f; + + // Push forward for multiple steps + c_reset(&env); + env.actions[4] = -1.0f; + env.player.pos = vec3(0, 0, 1500); + env.player.vel = vec3(150, 0, 0); + env.player.ori = quat(1, 0, 0, 0); + env.opponent.pos = vec3(400, 0, 1500); + env.opponent.vel = vec3(150, 0, 0); + env.opponent.ori = quat(1, 0, 0, 0); + + env.actions[1] = 0.5f; // Push forward (nose down, negative G) + float total_reward_push = 0.0f; + float push_g_sum = 0.0f; + for (int i = 0; i < 30; i++) { + c_step(&env); + total_reward_push += env.rewards[0]; + push_g_sum += env.player.g_force; + } + float push_g_avg = push_g_sum / 30.0f; + + // Verify G-forces are correct direction (pull = positive, push = negative) + assert(pull_g_avg > 1.0f); // Pull should give >1G + assert(push_g_avg < 0.5f); // Push should give <0.5G (triggering penalty) + + // Pull back should have better total reward (push fwd triggers neg_g penalty) + assert(total_reward_pull > total_reward_push); + printf("test_neg_g_penalty PASS (pull=%.4f > push=%.4f, pull_g=%.1f, push_g=%.1f)\n", + total_reward_pull, total_reward_push, pull_g_avg, push_g_avg); +} + +void test_rudder_penalty() { + // Test that no rudder gets better reward than full rudder + Dogfight env = make_env(1000); + c_reset(&env); + env.actions[4] = -1.0f; // Don't fire + + // No rudder + env.player.pos = vec3(0, 0, 1000); + env.player.vel = vec3(100, 0, 0); + env.player.ori = quat(1, 0, 0, 0); + env.opponent.pos = vec3(300, 0, 1000); + env.opponent.vel = vec3(100, 0, 0); + env.opponent.ori = quat(1, 0, 0, 0); + env.actions[3] = 0.0f; // No rudder + c_step(&env); + float reward_no_rudder = env.rewards[0]; + + // Full rudder + c_reset(&env); + env.actions[4] = -1.0f; + env.player.pos = vec3(0, 0, 1000); + env.player.vel = vec3(100, 0, 0); + env.player.ori = quat(1, 0, 0, 0); + env.opponent.pos = vec3(300, 0, 1000); + env.opponent.vel = vec3(100, 0, 0); + env.opponent.ori = quat(1, 0, 0, 0); + env.actions[3] = 1.0f; // Full rudder + c_step(&env); + float reward_rudder = env.rewards[0]; + + // No rudder should have better reward + assert(reward_no_rudder > reward_rudder); + printf("test_rudder_penalty PASS (no_rud=%.5f > rud=%.5f)\n", reward_no_rudder, reward_rudder); +} + +int main() { + printf("Running dogfight tests...\n\n"); + + // Phase 1 + test_vec3_math(); + test_quat_math(); + test_init(); + test_reset_plane(); + test_c_reset(); + test_compute_observations(); + test_c_step_moves_forward(); + test_oob_terminates(); + test_max_steps_terminates(); + test_supersonic_terminates(); + + // Phase 2 + test_opponent_spawns(); + test_relative_observations(); + test_pursuit_reward(); + + // Phase 3 + test_aircraft_params(); + test_throttle_accelerates(); + test_plane_falls_without_lift(); + test_controls_affect_orientation(); + test_dynamic_pressure(); + test_lift_opposes_gravity(); + test_drag_slows_plane(); + test_stall_clamps_lift(); + test_glimit_clamps_acceleration(); + test_forces_sum_correctly(); + test_integration_updates_state(); + + // Phase 3.5 + test_closing_velocity_reward(); + test_tail_position_reward(); + test_speed_penalty(); + + // Phase 4 + test_chase_camera_behind_player(); + test_camera_orbit_updates(); + test_client_struct_defaults(); + + // Phase 5 + test_trigger_fires(); + test_fire_cooldown(); + test_cone_hit_detection(); + test_hit_reward(); + test_kill_terminates_episode(); + test_combat_constants(); + + // Phase 5.5: Additional reward/penalty tests + test_roll_penalty(); + test_roll_penalty_accumulates(); + test_tracking_reward(); + test_firing_solution_reward(); + test_neg_g_penalty(); + test_rudder_penalty(); + + // Phase 6: Spawn variety tests + test_spawn_bearing_variety(); + test_spawn_heading_variety(); + test_curriculum_stages_differ(); + test_spawn_distance_range(); + + printf("\nAll 45 tests PASS\n"); + return 0; +} diff --git a/pufferlib/ocean/dogfight/flightlib.h b/pufferlib/ocean/dogfight/flightlib.h new file mode 100644 index 000000000..db523b8de --- /dev/null +++ b/pufferlib/ocean/dogfight/flightlib.h @@ -0,0 +1,422 @@ +// flightlib.h - Flight physics and simulation library for dogfight environment +// Modeled after dronelib.h pattern - self-contained physics simulation module +// +// Contains: +// - Math types (Vec3, Quat) and operations +// - Aircraft parameters (WW2 fighter class) +// - Plane struct (flight object state) +// - Physics functions (step_plane_with_physics, etc.) + +#ifndef FLIGHTLIB_H +#define FLIGHTLIB_H + +#include +#include +#include +#include + +// Allow DEBUG to be defined before including this header +#ifndef DEBUG +#define DEBUG 0 +#endif + +#ifndef PI +#define PI 3.14159265358979f +#endif + +// ============================================================================ +// MATH TYPES +// ============================================================================ + +typedef struct { float x, y, z; } Vec3; +typedef struct { float w, x, y, z; } Quat; + +// ============================================================================ +// MATH UTILITIES +// ============================================================================ + +static inline float clampf(float v, float lo, float hi) { + return v < lo ? lo : (v > hi ? hi : v); +} + +static inline float rndf(float a, float b) { + return a + ((float)rand() / (float)RAND_MAX) * (b - a); +} + +// --- Vec3 operations --- + +static inline Vec3 vec3(float x, float y, float z) { return (Vec3){x, y, z}; } +static inline Vec3 add3(Vec3 a, Vec3 b) { return (Vec3){a.x + b.x, a.y + b.y, a.z + b.z}; } +static inline Vec3 sub3(Vec3 a, Vec3 b) { return (Vec3){a.x - b.x, a.y - b.y, a.z - b.z}; } +static inline Vec3 mul3(Vec3 a, float s) { return (Vec3){a.x * s, a.y * s, a.z * s}; } +static inline float dot3(Vec3 a, Vec3 b) { return a.x * b.x + a.y * b.y + a.z * b.z; } +static inline float norm3(Vec3 a) { return sqrtf(dot3(a, a)); } + +static inline Vec3 normalize3(Vec3 v) { + float n = norm3(v); + if (n < 1e-8f) return vec3(0, 0, 0); + return mul3(v, 1.0f / n); +} + +static inline Vec3 cross3(Vec3 a, Vec3 b) { + return vec3( + a.y * b.z - a.z * b.y, + a.z * b.x - a.x * b.z, + a.x * b.y - a.y * b.x + ); +} + +// --- Quaternion operations --- + +static inline Quat quat(float w, float x, float y, float z) { return (Quat){w, x, y, z}; } + +static inline Quat quat_mul(Quat a, Quat b) { + return (Quat){ + a.w*b.w - a.x*b.x - a.y*b.y - a.z*b.z, + a.w*b.x + a.x*b.w + a.y*b.z - a.z*b.y, + a.w*b.y - a.x*b.z + a.y*b.w + a.z*b.x, + a.w*b.z + a.x*b.y - a.y*b.x + a.z*b.w + }; +} + +static inline void quat_normalize(Quat* q) { + float n = sqrtf(q->w*q->w + q->x*q->x + q->y*q->y + q->z*q->z); + if (n > 1e-8f) { + float inv = 1.0f / n; + q->w *= inv; q->x *= inv; q->y *= inv; q->z *= inv; + } +} + +static inline Vec3 quat_rotate(Quat q, Vec3 v) { + Quat qv = {0.0f, v.x, v.y, v.z}; + Quat q_conj = {q.w, -q.x, -q.y, -q.z}; + Quat tmp = quat_mul(q, qv); + Quat res = quat_mul(tmp, q_conj); + return (Vec3){res.x, res.y, res.z}; +} + +static inline Quat quat_from_axis_angle(Vec3 axis, float angle) { + float half = angle * 0.5f; + float s = sinf(half); + return (Quat){cosf(half), axis.x * s, axis.y * s, axis.z * s}; +} + +// ============================================================================ +// AIRCRAFT PARAMETERS - P-51D Mustang Reference +// ============================================================================ +// Based on P51d_REFERENCE_DATA.md - validated against historical data +// Test condition: 9,000 lb (4,082 kg) combat weight, sea level ISA +// +// THEORETICAL PERFORMANCE (P-51D targets): +// Max speed (SL, Military): 355 mph (159 m/s) +// Max speed (SL, WEP): 368 mph (164 m/s) +// Stall speed (clean): 100 mph (45 m/s) +// ROC (SL, Military): 3,030 ft/min (15.4 m/s) +// +// LIFT MODEL: +// C_L = C_L_alpha * (alpha + incidence - alpha_zero) +// The P-51D has a cambered airfoil (NAA 45-100) with alpha_zero = -1.2° +// Wing incidence is +1.5° relative to fuselage datum +// At 0° body pitch: effective AOA = 1.5° - (-1.2°) = 2.7°, C_L ~ 0.26 +// +// DRAG POLAR: Cd = Cd0 + K * Cl^2 +// - Cd0 = 0.0163 (P-51D published value, very clean laminar flow wing) +// - K = 0.072 = 1/(pi * e * AR) where e=0.75, AR=5.86 +// ============================================================================ + +#define MASS 4082.0f // kg (P-51D combat weight: 9,000 lb) +#define WING_AREA 21.65f // m^2 (P-51D: 233 ft^2) +#define C_D0 0.0163f // parasitic drag coefficient (P-51D laminar flow) +#define K 0.072f // induced drag factor: 1/(pi*0.75*5.86) +#define C_L_MAX 1.48f // max lift coefficient before stall (P-51D clean) +#define C_L_ALPHA 5.56f // lift curve slope (P-51D: 0.097/deg = 5.56/rad) +#define ALPHA_ZERO -0.021f // zero-lift angle (rad), -1.2° for cambered airfoil +#define WING_INCIDENCE 0.026f // wing incidence angle (rad), +1.5° (P-51D) +#define ENGINE_POWER 1112000.0f // watts (P-51D Military: 1,490 hp) +#define ETA_PROP 0.80f // propeller efficiency (P-51D cruise: 0.80-0.85) +#define GRAVITY 9.81f // m/s^2 +#define G_LIMIT_POS 6.0f // max positive G (pulling up) - pilot limit +#define G_LIMIT_NEG 1.5f // max negative G (pushing over) - blood to head is painful +#define RHO 1.225f // air density kg/m^3 (sea level ISA) + +// Inverse constants for faster computation (multiply instead of divide) +#define INV_MASS 0.000245f // 1/4082 +#define INV_GRAVITY 0.10197f // 1/9.81 +#define RAD_TO_DEG 57.2957795f // 180/PI + +#define MAX_PITCH_RATE 2.5f // rad/s +#define MAX_ROLL_RATE 3.0f // rad/s +#define MAX_YAW_RATE 0.50f // rad/s (~29 deg/s command, realistic ~7 deg/s achieved) + +// ============================================================================ +// PLANE STRUCT - Flight object state +// ============================================================================ + +typedef struct { + Vec3 pos; + Vec3 vel; + Vec3 prev_vel; // Previous velocity for acceleration calculation + Quat ori; + float throttle; + float g_force; // Current G-loading (for reward calculation) + int fire_cooldown; // Ticks until can fire again (0 = ready) +} Plane; + +// ============================================================================ +// PHYSICS FUNCTIONS +// ============================================================================ + +static inline void reset_plane(Plane *p, Vec3 pos, Vec3 vel) { + p->pos = pos; + p->vel = vel; + p->prev_vel = vel; // Initialize to current vel (no acceleration at start) + p->ori = quat(1, 0, 0, 0); + p->throttle = 0.5f; + p->g_force = 1.0f; // 1G at start (level flight) + p->fire_cooldown = 0; +} + +// ============================================================================ +// PHYSICS MODEL - step_plane_with_physics() +// ============================================================================ +// This implements a simplified 6-DOF flight model with: +// - Rate-based attitude control (not position control) +// - Point-mass aerodynamics (no moments/stability derivatives) +// - Propeller thrust model (T = P*eta/V, capped at static thrust) +// - Drag polar: Cd = Cd0 + K*Cl^2 +// - Wing incidence angle (built-in AOA for near-level cruise) +// +// COORDINATE SYSTEM: +// World frame: X=East, Y=North, Z=Up (right-handed, Z-up) +// Body frame: X=Forward (nose), Y=Right (wing), Z=Up (canopy) +// +// WING INCIDENCE: +// The wing is mounted at WING_INCIDENCE (~2 deg) relative to fuselage. +// Effective AOA for lift = body_alpha + WING_INCIDENCE +// This allows near-level flight at cruise speed with zero pitch input. +// +// REMAINING LIMITATIONS: +// - No pitching moment / static stability (Cm_alpha) +// - Rate-based controls (not position-based) +// - Symmetric stall model (real stall is asymmetric) +// ============================================================================ +static inline void step_plane_with_physics(Plane *p, float *actions, float dt) { + // Save previous velocity for acceleration calculation (v²/r) + p->prev_vel = p->vel; + + // ======================================================================== + // 1. BODY FRAME AXES (transform from body to world coordinates) + // ======================================================================== + // These are the aircraft's body axes expressed in world coordinates + Vec3 forward = quat_rotate(p->ori, vec3(1, 0, 0)); // Nose direction + Vec3 right = quat_rotate(p->ori, vec3(0, 1, 0)); // Right wing direction + Vec3 up = quat_rotate(p->ori, vec3(0, 0, 1)); // Canopy direction + + // ======================================================================== + // 2. CONTROL INPUTS -> ANGULAR RATES + // ======================================================================== + // Actions are [-1, 1], mapped to physical rates + // NOTE: These are RATE commands, not POSITION commands! + // Holding elevator=0.5 pitches DOWN continuously (standard joystick convention) + float throttle = (actions[0] + 1.0f) * 0.5f; // [-1,1] -> [0,1] + float pitch_rate = actions[1] * MAX_PITCH_RATE; // rad/s, + = nose down (push fwd) + float roll_rate = actions[2] * MAX_ROLL_RATE; // rad/s, + = roll right + float yaw_rate = actions[3] * MAX_YAW_RATE; // rad/s, + = nose right + + // ======================================================================== + // 3. ATTITUDE INTEGRATION (Quaternion kinematics) + // ======================================================================== + // q_dot = 0.5 * q * w where w is angular velocity in body frame + // This is the standard quaternion derivative formula + Vec3 omega_body = vec3(roll_rate, pitch_rate, yaw_rate); // body-frame w + Quat omega_quat = quat(0, omega_body.x, omega_body.y, omega_body.z); + Quat q_dot = quat_mul(p->ori, omega_quat); + p->ori.w += 0.5f * q_dot.w * dt; + p->ori.x += 0.5f * q_dot.x * dt; + p->ori.y += 0.5f * q_dot.y * dt; + p->ori.z += 0.5f * q_dot.z * dt; + quat_normalize(&p->ori); // Prevent drift from numerical integration + + // ======================================================================== + // 4. ANGLE OF ATTACK (AOA, a) + // ======================================================================== + // AOA = angle between velocity vector and body X-axis (nose) + // Positive a = nose above flight path = generating positive lift + // + // SIGN CONVENTION: + // If velocity has component opposite to body Z (up), nose is above + // flight path, so a is positive. + float V = norm3(p->vel); + if (V < 1.0f) V = 1.0f; // Prevent division by zero + + Vec3 vel_norm = normalize3(p->vel); + float cos_alpha = dot3(vel_norm, forward); + cos_alpha = clampf(cos_alpha, -1.0f, 1.0f); + float alpha = acosf(cos_alpha); // Always positive [0, pi] + + // Determine sign: positive when nose is ABOVE velocity vector + // If vel dot up < 0, velocity is "below" the body frame -> nose above -> a > 0 + float sign_alpha = (dot3(p->vel, up) < 0) ? 1.0f : -1.0f; + alpha *= sign_alpha; + + // ======================================================================== + // 5. LIFT COEFFICIENT (Linear + Stall Clamp) + // ======================================================================== + // C_L = C_L_alpha * (alpha - alpha_zero) + // For cambered airfoils, alpha_zero < 0 (generates lift at 0° AOA) + // P-51D NAA 45-100 airfoil: alpha_zero = -1.2° + // + // Effective AOA for lift = body_alpha + wing_incidence - alpha_zero + // At 0° body pitch: alpha_eff = 0 + 1.5° - (-1.2°) = 2.7° + // This gives C_L = 5.56 * 0.047 = 0.26, allowing near-level cruise + // + // Stall occurs at alpha_eff ~ 19° (P-51D clean), C_L_max = 1.48 + float alpha_effective = alpha + WING_INCIDENCE - ALPHA_ZERO; + float C_L = C_L_ALPHA * alpha_effective; + C_L = clampf(C_L, -C_L_MAX, C_L_MAX); // Stall limiting (symmetric) + + // ======================================================================== + // 6. DYNAMIC PRESSURE + // ======================================================================== + // q = 0.5*rho*V^2 [Pa or N/m^2] + // This is the "pressure" available for aerodynamic forces + // At 100 m/s: q = 0.5 * 1.225 * 10000 = 6,125 Pa + float q_dyn = 0.5f * RHO * V * V; + + // ======================================================================== + // 7. LIFT FORCE + // ======================================================================== + // L = Cl * q * S [Newtons] + // For level flight: L = W = m*g = 29,430 N + // Required Cl at 100 m/s: Cl = 29430 / (6125 * 22) = 0.218 + // Required a = 0.218 / 5.7 = 0.038 rad ~ 2.2 deg + float L_mag = C_L * q_dyn * WING_AREA; + + // ======================================================================== + // 8. DRAG FORCE (Drag Polar) + // ======================================================================== + // Cd = Cd0 + K * Cl^2 + // Cd0 = parasitic drag (skin friction + form drag) + // K*Cl^2 = induced drag (vortex drag from lift generation) + // + // At cruise (Cl=0.22): Cd = 0.02 + 0.05*0.048 = 0.0224 + // At Cl_max (Cl=1.4): Cd = 0.02 + 0.05*1.96 = 0.118 + float C_D = C_D0 + K * C_L * C_L; + float D_mag = C_D * q_dyn * WING_AREA; + + // ======================================================================== + // 9. THRUST FORCE (Propeller Model) + // ======================================================================== + // Power-based: P = T * V -> T = P * eta / V + // At low speed, thrust is limited by static thrust capability + // + // At V=80 m/s, full throttle: T = 800,000 / 80 = 10,000 N + // At V=143 m/s (max speed): T = 800,000 / 143 = 5,594 N ~ D + float P_avail = ENGINE_POWER * throttle; + float T_dynamic = (P_avail * ETA_PROP) / V; // Thrust from power equation + float T_static = 0.3f * P_avail; // Static thrust limit + float T_mag = fminf(T_static, T_dynamic); // Can't exceed either limit + + // ======================================================================== + // 10. FORCE DIRECTIONS (All in world frame) + // ======================================================================== + Vec3 drag_dir = mul3(vel_norm, -1.0f); // Opposite to velocity + Vec3 thrust_dir = forward; // Along body X-axis (nose) + + // Lift direction: perpendicular to velocity, in plane of velocity & wing + // lift_dir = vel x right, then normalized + // This ensures lift is perpendicular to V and perpendicular to span + Vec3 lift_dir = cross3(vel_norm, right); + float lift_dir_mag = norm3(lift_dir); + if (lift_dir_mag > 0.01f) { + lift_dir = mul3(lift_dir, 1.0f / lift_dir_mag); + } else { + lift_dir = up; // Fallback if velocity parallel to wing (rare) + } + + // ======================================================================== + // 11. WEIGHT (Gravity) + // ======================================================================== + Vec3 weight = vec3(0, 0, -MASS * GRAVITY); // Always -Z in world frame + + // ======================================================================== + // 12. SUM FORCES -> ACCELERATION + // ======================================================================== + Vec3 F_thrust = mul3(thrust_dir, T_mag); + Vec3 F_lift = mul3(lift_dir, L_mag); + Vec3 F_drag = mul3(drag_dir, D_mag); + + // Aerodynamic forces only (what pilot feels - "specific force") + // In level flight: lift ≈ weight, so F_aero_up ≈ m*g, giving g_force ≈ 1.0 + Vec3 F_aero = add3(F_thrust, add3(F_lift, F_drag)); + + // Body-up axis (perpendicular to wings, toward canopy) + Vec3 body_up = quat_rotate(p->ori, vec3(0, 0, 1)); + + // G-force = aero force along body-up / (mass * g) + // This is what the pilot feels (pushed into seat = positive G) + float g_force = dot3(F_aero, body_up) * INV_MASS * INV_GRAVITY; + + // Total force includes weight for actual physics + Vec3 F_total = add3(F_aero, weight); + + // ======================================================================== + // 13. G-LIMIT (Asymmetric for Positive/Negative G) + // ======================================================================== + // Pilots can handle much more positive G (blood to feet, 6G+) than + // negative G (blood to head, -1.5G is very uncomfortable). + // Limit the body-normal acceleration asymmetrically. + Vec3 accel = mul3(F_total, INV_MASS); + + // Asymmetric limits on felt G + if (g_force > G_LIMIT_POS) { + // Positive G exceeded - clamp + float excess = (g_force - G_LIMIT_POS) * GRAVITY; // Excess accel in m/s^2 + accel = sub3(accel, mul3(body_up, excess)); + g_force = G_LIMIT_POS; + } else if (g_force < -G_LIMIT_NEG) { + // Negative G exceeded - clamp (need to ADD acceleration along body_up) + float deficit = (-G_LIMIT_NEG - g_force) * GRAVITY; // How much to add back + accel = add3(accel, mul3(body_up, deficit)); // ADD, not subtract! + g_force = -G_LIMIT_NEG; + } + + if (DEBUG >= 10) printf("=== PHYSICS ===\n"); + if (DEBUG >= 10) printf("speed=%.1f m/s (stall~45, max~159 P-51D)\n", V); + if (DEBUG >= 10) printf("throttle=%.2f\n", throttle); + if (DEBUG >= 10) printf("alpha_body=%.2f deg, alpha_eff=%.2f deg (inc=%.1f, a0=%.1f), C_L=%.3f\n", + alpha * RAD_TO_DEG, alpha_effective * RAD_TO_DEG, + WING_INCIDENCE * RAD_TO_DEG, ALPHA_ZERO * RAD_TO_DEG, C_L); + if (DEBUG >= 10) printf("thrust=%.0f N, lift=%.0f N, drag=%.0f N, weight=%.0f N\n", T_mag, L_mag, D_mag, MASS * GRAVITY); + if (DEBUG >= 10) printf("g_force=%.2f g (limit=+%.1f/-%.1f)\n", g_force, G_LIMIT_POS, G_LIMIT_NEG); + + // ======================================================================== + // 14. INTEGRATION (Semi-implicit Euler) + // ======================================================================== + // v(t+dt) = v(t) + a * dt + // x(t+dt) = x(t) + v(t+dt) * dt (using NEW velocity) + p->vel = add3(p->vel, mul3(accel, dt)); + p->pos = add3(p->pos, mul3(p->vel, dt)); + + p->throttle = throttle; + p->g_force = g_force; // Store for reward calculation +} + +// Simple forward motion for opponent (no physics, just maintains heading) +static inline void step_plane(Plane *p, float dt) { + // Save previous velocity for acceleration calculation + p->prev_vel = p->vel; + + Vec3 forward = quat_rotate(p->ori, vec3(1, 0, 0)); + float speed = norm3(p->vel); + if (speed < 1.0f) speed = 80.0f; + p->vel = mul3(forward, speed); + p->pos = add3(p->pos, mul3(p->vel, dt)); + + if (DEBUG >= 10) printf("=== TARGET ===\n"); + if (DEBUG >= 10) printf("target_speed=%.1f m/s (expected=80)\n", speed); + if (DEBUG >= 10) printf("target_pos=(%.1f, %.1f, %.1f)\n", p->pos.x, p->pos.y, p->pos.z); + if (DEBUG >= 10) printf("target_fwd=(%.2f, %.2f, %.2f)\n", forward.x, forward.y, forward.z); +} + +#endif // FLIGHTLIB_H diff --git a/pufferlib/ocean/dogfight/physics_log.md b/pufferlib/ocean/dogfight/physics_log.md new file mode 100644 index 000000000..7daa0faaf --- /dev/null +++ b/pufferlib/ocean/dogfight/physics_log.md @@ -0,0 +1,25 @@ +# Physics Sanity Log + +Historical record of physics test results at specific commits. + +**P-51D Reference values** (from P51d_REFERENCE_DATA.md): +- Max speed: 159 m/s (355 mph, Military power, sea level) +- Stall speed: 45 m/s (100 mph, 9000 lb, clean config) + +--- + +## How to use + +1. Run tests: `cd pufferlib/ocean/dogfight && python test_flight.py` +2. If at a clean commit worth recording, add entry below +3. Include commit hash from `git rev-parse --short HEAD` + +--- + +## Results + +| Commit | Date | max_speed | stall | climb | L/D | turn_30 | turn_60 | pitch | roll | Notes | +|--------|------|-----------|-------|-------|-----|---------|---------|-------|------|-------| +| P-51D | ref | 159 | 45 | 15 | 14.6| - | - | UP | YES | Reference targets | +| 0116b97c | 2026-01-13 | 86.5 | 75.5 | -4.9 | - | - | - | UP | YES | Old tests, pre-physics fix | +| 1c30c546 | 2026-01-14 | 149.6 | 50 | 16.3 | 14.7 | 2.2 | 9.4 | UP | YES | Coordinated turn tests, 97% eff | diff --git a/pufferlib/ocean/dogfight/test_flight.py b/pufferlib/ocean/dogfight/test_flight.py new file mode 100644 index 000000000..75fb6af3a --- /dev/null +++ b/pufferlib/ocean/dogfight/test_flight.py @@ -0,0 +1,1507 @@ +""" +Physics validation tests for dogfight environment. +Uses force_state() to set exact initial conditions for accurate measurements. + +Run: python pufferlib/ocean/dogfight/test_flight.py + python pufferlib/ocean/dogfight/test_flight.py --render # with visualization + python pufferlib/ocean/dogfight/test_flight.py --render --test pitch_direction # single test + +TODO - FLIGHT PHYSICS TESTS NEEDED: +===================================== +1. RUDDER-ONLY TURN TEST (HIGH PRIORITY) + - Current MAX_YAW_RATE = 1.5 rad/s (86 deg/s) is WAY too high + - P-51D rudder should give ~5-15 deg/s yaw rate max, with significant sideslip + - Test: wings level, full rudder, measure actual yaw rate and heading change + - Compare against P-51D flight test data (see P51d_REFERENCE_DATA.md) + - Expected: rudder alone should NOT be effective for turning - need bank + +2. COORDINATED TURN TEST + - Bank to 30°, 45°, 60° and measure sustained turn rate + - P-51D should get ~17.5 deg/s at max sustained (corner velocity) + - Verify turn rate vs bank angle relationship + +3. ROLL RATE TEST + - Full aileron deflection, measure time to roll 90° and 360° + - P-51D: ~90-100 deg/s roll rate at 300 mph + +4. PITCH AUTHORITY TEST + - Full elevator, measure pitch rate and G-loading + - Should be speed-dependent (less authority at low speed) +""" +import argparse +import numpy as np +from dogfight import Dogfight, AutopilotMode + + +def parse_args(): + parser = argparse.ArgumentParser(description='P-51D Physics Validation Tests') + parser.add_argument('--render', action='store_true', help='Enable visual rendering') + parser.add_argument('--fps', type=int, default=50, help='Target FPS when rendering (default 50 = real-time, try 5-10 for slow-mo)') + parser.add_argument('--test', type=str, default=None, help='Run specific test only') + return parser.parse_args() + + +ARGS = parse_args() +RENDER_MODE = 'human' if ARGS.render else None +RENDER_FPS = ARGS.fps if ARGS.render else None + +# Constants (must match dogfight.h) +MAX_SPEED = 250.0 +WORLD_MAX_Z = 3000.0 + +# P-51D reference values (from P51d_REFERENCE_DATA.md) +P51D_MAX_SPEED = 159.0 # m/s (355 mph, Military power, SL) +P51D_STALL_SPEED = 45.0 # m/s (100 mph, 9000 lb, clean) +P51D_CLIMB_RATE = 15.4 # m/s (3030 ft/min, Military power) +P51D_TURN_RATE = 17.5 # deg/s at max sustained turn (DCS testing data) + +# PID values for level flight autopilot (found via pid_sweep.py) +# These give stable level flight with vz_std < 0.3 m/s +LEVEL_FLIGHT_KP = 0.001 # Proportional gain on vz error +LEVEL_FLIGHT_KD = 0.001 # Derivative gain (damping) + +RESULTS = {} + + +# ============================================================================= +# State accessor functions using get_state() (independent of obs_scheme) +# ============================================================================= + +def get_speed_from_state(env): + """Get total speed from raw state.""" + s = env.get_state() + return np.sqrt(s['vx']**2 + s['vy']**2 + s['vz']**2) + + +def get_vz_from_state(env): + """Get vertical velocity from raw state.""" + return env.get_state()['vz'] + + +def get_alt_from_state(env): + """Get altitude from raw state.""" + return env.get_state()['pz'] + + +def get_up_vector_from_state(env): + """Get up vector from raw state.""" + s = env.get_state() + return s['up_x'], s['up_y'], s['up_z'] + + +def get_velocity_from_state(env): + """Get velocity vector from raw state.""" + s = env.get_state() + return s['vx'], s['vy'], s['vz'] + + +def level_flight_pitch_from_state(env, kp=LEVEL_FLIGHT_KP, kd=LEVEL_FLIGHT_KD): + """ + PD autopilot for level flight (vz = 0). + Uses tuned PID values from pid_sweep.py for stable flight. + """ + vz = get_vz_from_state(env) + # Negative because: if climbing (vz>0), need nose down (negative elevator) + elevator = -kp * vz - kd * vz + return np.clip(elevator, -0.2, 0.2) + + +# ============================================================================= +# Legacy functions (use observations - for obs_scheme testing only) +# ============================================================================= + +def get_speed(obs): + """Get total speed from observation (LEGACY - assumes WORLD_FRAME).""" + vx = obs[0, 3] * MAX_SPEED + vy = obs[0, 4] * MAX_SPEED + vz = obs[0, 5] * MAX_SPEED + return np.sqrt(vx**2 + vy**2 + vz**2) + + +def get_vz(obs): + """Get vertical velocity from observation (LEGACY - assumes WORLD_FRAME).""" + return obs[0, 5] * MAX_SPEED + + +def get_alt(obs): + """Get altitude from observation (LEGACY - assumes WORLD_FRAME).""" + return obs[0, 2] * WORLD_MAX_Z + + +def level_flight_pitch(obs, kp=LEVEL_FLIGHT_KP, kd=LEVEL_FLIGHT_KD): + """ + PD autopilot for level flight (vz = 0). LEGACY - assumes WORLD_FRAME. + Uses tuned PID values from pid_sweep.py for stable flight. + """ + vz = get_vz(obs) + # Negative because: if climbing (vz>0), need nose down (negative elevator) + elevator = -kp * vz - kd * vz + return np.clip(elevator, -0.2, 0.2) + + +def test_max_speed(): + """ + Full throttle level flight starting near max speed. + Should stabilize around 159 m/s (P-51D Military power). + """ + env = Dogfight(num_envs=1, render_mode=RENDER_MODE, render_fps=RENDER_FPS) + env.reset() + + # Start at 150 m/s (near expected max), center of world, flying +X + env.force_state( + player_pos=(-1000, 0, 1000), + player_vel=(150, 0, 0), + player_throttle=1.0, + ) + + obs = env.observations + prev_speed = get_speed(obs) + stable_count = 0 + + for step in range(1500): # 30 seconds + elevator = level_flight_pitch(obs) + action = np.array([[1.0, elevator, 0.0, 0.0, 0.0]], dtype=np.float32) + obs, _, term, _, _ = env.step(action) + + if term[0]: + print(" (terminated - hit bounds)") + break + + speed = get_speed(obs) + if abs(speed - prev_speed) < 0.05: + stable_count += 1 + if stable_count > 100: + break + else: + stable_count = 0 + prev_speed = speed + + final_speed = get_speed(obs) + RESULTS['max_speed'] = final_speed + diff = final_speed - P51D_MAX_SPEED + status = "OK" if abs(diff) < 15 else "CHECK" + print(f"max_speed: {final_speed:6.1f} m/s (P-51D: {P51D_MAX_SPEED:.0f}, diff: {diff:+.1f}) [{status}]") + + +def test_acceleration(): + """ + Full throttle starting at 100 m/s - verify plane accelerates. + Should see speed increase toward max speed (~150 m/s). + """ + env = Dogfight(num_envs=1, render_mode=RENDER_MODE, render_fps=RENDER_FPS) + env.reset() + + # Start at 100 m/s (well below max speed) + env.force_state( + player_pos=(-1000, 0, 1000), + player_vel=(100, 0, 0), + player_throttle=1.0, + ) + + initial_speed = get_speed_from_state(env) + speeds = [initial_speed] + + for step in range(500): # 10 seconds + elevator = level_flight_pitch_from_state(env) + action = np.array([[1.0, elevator, 0.0, 0.0, 0.0]], dtype=np.float32) + _, _, term, _, _ = env.step(action) + + if term[0]: + print(" (terminated - hit bounds)") + break + + speed = get_speed_from_state(env) + speeds.append(speed) + + final_speed = speeds[-1] + speed_gain = final_speed - initial_speed + RESULTS['acceleration'] = speed_gain + + # Should gain at least 20 m/s in 10 seconds + status = "OK" if speed_gain > 20 else "CHECK" + print(f"acceleration: {initial_speed:.0f} -> {final_speed:.0f} m/s (gained {speed_gain:+.1f} m/s) [{status}]") + + +def test_deceleration(): + """ + Zero throttle starting at 150 m/s - verify plane decelerates due to drag. + Should see speed decrease as drag slows the plane. + """ + env = Dogfight(num_envs=1, render_mode=RENDER_MODE, render_fps=RENDER_FPS) + env.reset() + + # Start at 150 m/s with zero throttle + env.force_state( + player_pos=(-1000, 0, 1000), + player_vel=(150, 0, 0), + player_throttle=0.0, + ) + + initial_speed = get_speed_from_state(env) + speeds = [initial_speed] + + for step in range(500): # 10 seconds + elevator = level_flight_pitch_from_state(env) + # Zero throttle (action[0] = -1 maps to 0% throttle) + action = np.array([[-1.0, elevator, 0.0, 0.0, 0.0]], dtype=np.float32) + _, _, term, _, _ = env.step(action) + + if term[0]: + print(" (terminated - hit bounds)") + break + + speed = get_speed_from_state(env) + speeds.append(speed) + + final_speed = speeds[-1] + speed_loss = initial_speed - final_speed + RESULTS['deceleration'] = speed_loss + + # Should lose at least 20 m/s in 10 seconds due to drag + status = "OK" if speed_loss > 20 else "CHECK" + print(f"deceleration: {initial_speed:.0f} -> {final_speed:.0f} m/s (lost {speed_loss:+.1f} m/s) [{status}]") + + +def test_cruise_speed(): + """50% throttle level flight - cruise speed.""" + env = Dogfight(num_envs=1, render_mode=RENDER_MODE, render_fps=RENDER_FPS) + env.reset() + + # Start at moderate speed + env.force_state( + player_pos=(-1000, 0, 1000), + player_vel=(120, 0, 0), + player_throttle=0.5, + ) + + obs = env.observations + prev_speed = get_speed(obs) + stable_count = 0 + + for step in range(1500): + elevator = level_flight_pitch(obs) + action = np.array([[0.0, elevator, 0.0, 0.0, 0.0]], dtype=np.float32) # 50% throttle + obs, _, term, _, _ = env.step(action) + + if term[0]: + break + + speed = get_speed(obs) + if abs(speed - prev_speed) < 0.05: + stable_count += 1 + if stable_count > 100: + break + else: + stable_count = 0 + prev_speed = speed + + final_speed = get_speed(obs) + RESULTS['cruise_speed'] = final_speed + print(f"cruise_speed: {final_speed:6.1f} m/s (50% throttle)") + + +def test_stall_speed(): + """ + Find stall speed by testing level flight at decreasing speeds. + + At each speed, set the exact pitch angle needed for level flight, + then verify the physics can maintain altitude. Stall occurs when + required C_L exceeds C_L_max. + + This bypasses autopilot limitations by setting pitch directly. + """ + env = Dogfight(num_envs=1, render_mode=RENDER_MODE, render_fps=RENDER_FPS) + + # Physics constants (must match flightlib.h) + W = 4082 * 9.81 # Weight (N) + rho = 1.225 # Air density + S = 21.65 # Wing area + C_L_max = 1.48 # Max lift coefficient + C_L_alpha = 5.56 # Lift curve slope + alpha_zero = -0.021 # Zero-lift angle (rad) + wing_inc = 0.026 # Wing incidence (rad) + + # Theoretical stall speed + V_stall_theory = np.sqrt(2 * W / (rho * S * C_L_max)) + + # Test speeds from high to low + stall_speed = None + last_flyable = None + + for V in range(70, 35, -5): + env.reset() + + # C_L needed for level flight at this speed + q_dyn = 0.5 * rho * V * V + C_L_needed = W / (q_dyn * S) + + # Check if within aerodynamic limits + if C_L_needed > C_L_max: + # Can't fly level - this is stall + stall_speed = V + break + + # Calculate pitch angle needed for this C_L + # C_L = C_L_alpha * (alpha + wing_inc - alpha_zero) + alpha_needed = C_L_needed / C_L_alpha - wing_inc + alpha_zero + + # Create pitch-up quaternion (rotation about Y axis) + # Negative angle because positive Y rotation = nose DOWN (right-hand rule) + pitch_rad = alpha_needed + ori_w = np.cos(-pitch_rad / 2) + ori_y = np.sin(-pitch_rad / 2) + + # Set up plane at exact pitch for level flight + env.force_state( + player_pos=(0, 0, 1000), + player_vel=(V, 0, 0), + player_ori=(ori_w, 0, ori_y, 0), + player_throttle=0.0, # Zero throttle - just testing lift + ) + + # Run for 2 seconds with zero controls, measure vz + obs = env.observations + vzs = [] + for _ in range(100): # 2 seconds + vz = get_vz(obs) + vzs.append(vz) + action = np.array([[-1.0, 0.0, 0.0, 0.0, 0.0]], dtype=np.float32) + obs, _, term, _, _ = env.step(action) + if term[0]: + break + + avg_vz = np.mean(vzs[-50:]) if len(vzs) >= 50 else np.mean(vzs) + + # If maintaining altitude (vz near 0 or positive), plane can fly + if avg_vz >= -5: # Allow small sink rate + last_flyable = V + + # Stall speed is between last_flyable and the speed where C_L > C_L_max + if stall_speed is None: + stall_speed = 35 # Below our test range + elif last_flyable is not None: + # Interpolate: stall is where we transition from flyable to not + stall_speed = last_flyable + + RESULTS['stall_speed'] = stall_speed + diff = stall_speed - P51D_STALL_SPEED + status = "OK" if abs(diff) < 10 else "CHECK" + print(f"stall_speed: {stall_speed:6.1f} m/s (P-51D: {P51D_STALL_SPEED:.0f}, diff: {diff:+.1f}, theory: {V_stall_theory:.0f}) [{status}]") + + +def test_climb_rate(): + """ + Measure climb rate at Vy (best climb speed) with optimal pitch. + + Sets up plane at Vy with the pitch angle calculated for steady climb, + then measures actual climb rate. This tests that physics produces + correct excess thrust at climb speed. + + Approach: Calculate pitch for expected P-51D climb (15.4 m/s at 74 m/s), + set that state with force_state(), run with zero elevator (pitch holds), + and verify physics produces the expected climb rate. + """ + env = Dogfight(num_envs=1, render_mode=RENDER_MODE, render_fps=RENDER_FPS) + + # Physics constants (must match flightlib.h) + W = 4082 * 9.81 # Weight (N) + rho = 1.225 # Air density + S = 21.65 # Wing area + C_L_alpha = 5.56 # Lift curve slope + alpha_zero = -0.021 # Zero-lift angle (rad) + wing_inc = 0.026 # Wing incidence (rad) + + Vy = 74.0 # Best climb speed (m/s) + + # Calculate climb geometry for P-51D expected performance + expected_ROC = P51D_CLIMB_RATE # 15.4 m/s + gamma = np.arcsin(expected_ROC / Vy) # Climb angle ~12° + + # In steady climb: L = W * cos(gamma) + L_needed = W * np.cos(gamma) + q_dyn = 0.5 * rho * Vy * Vy + C_L = L_needed / (q_dyn * S) + + # Calculate AOA needed for this lift + alpha = C_L / C_L_alpha - wing_inc + alpha_zero + + # Body pitch = AOA + climb angle (nose above horizon) + pitch = alpha + gamma + + # Create pitch-up quaternion (negative angle because positive Y rotation = nose DOWN) + ori_w = np.cos(-pitch / 2) + ori_y = np.sin(-pitch / 2) + + # Set up plane in steady climb: velocity vector along climb path + vx = Vy * np.cos(gamma) + vz = Vy * np.sin(gamma) # This IS the expected climb rate + + env.reset() + env.force_state( + player_pos=(0, 0, 500), + player_vel=(vx, 0, vz), # Velocity along climb path + player_ori=(ori_w, 0, ori_y, 0), # Pitch for steady climb + player_throttle=1.0, + ) + + # Run with zero elevator (pitch holds constant) and measure vz + obs = env.observations + vzs = [] + speeds = [] + + for step in range(1000): # 20 seconds + vz_obs = get_vz(obs) + speed = get_speed(obs) + + # Skip first 5 seconds for settling, then collect data + if step >= 250: + vzs.append(vz_obs) + speeds.append(speed) + + # Zero elevator - pitch angle holds due to rate-based controls + action = np.array([[1.0, 0.0, 0.0, 0.0, 0.0]], dtype=np.float32) + obs, _, term, _, _ = env.step(action) + if term[0]: + break + + avg_vz = np.mean(vzs) if vzs else 0 + avg_speed = np.mean(speeds) if speeds else 0 + + RESULTS['climb_rate'] = avg_vz + diff = avg_vz - P51D_CLIMB_RATE + status = "OK" if abs(diff) < 5 else "CHECK" + print(f"climb_rate: {avg_vz:6.1f} m/s (P-51D: {P51D_CLIMB_RATE:.0f}, diff: {diff:+.1f}, speed: {avg_speed:.0f}/{Vy:.0f}) [{status}]") + + +def test_glide_ratio(): + """ + Power-off glide test - validates drag polar (Cd = Cd0 + K*Cl^2). + + At best glide speed, L/D is maximized. This occurs when induced drag + equals parasitic drag (Cd0 = K*Cl^2). + + From our drag polar: + Cl_opt = sqrt(Cd0/K) = sqrt(0.0163/0.072) = 0.476 + Cd_opt = 2*Cd0 = 0.0326 + L/D_max = Cl_opt/Cd_opt = 14.6 + + Best glide speed: V = sqrt(2W/(rho*S*Cl)) = 80 m/s + Glide angle: γ = arctan(1/L/D) = 3.9° + Expected sink rate: V * sin(γ) = V/(L/D) = 5.5 m/s + """ + env = Dogfight(num_envs=1, render_mode=RENDER_MODE, render_fps=RENDER_FPS) + + # Calculate theoretical values from drag polar + Cd0 = 0.0163 + K = 0.072 + W = 4082 * 9.81 + rho = 1.225 + S = 21.65 + C_L_alpha = 5.56 + alpha_zero = -0.021 + wing_inc = 0.026 + + Cl_opt = np.sqrt(Cd0 / K) # 0.476 + Cd_opt = 2 * Cd0 # 0.0326 + LD_max = Cl_opt / Cd_opt # 14.6 + + # Best glide speed + V_glide = np.sqrt(2 * W / (rho * S * Cl_opt)) # ~80 m/s + + # Glide angle (nose below horizon for descent) + gamma = np.arctan(1 / LD_max) # ~3.9° = 0.068 rad + + # Expected sink rate + sink_expected = V_glide * np.sin(gamma) # ~5.5 m/s + + # AOA needed for Cl_opt + alpha = Cl_opt / C_L_alpha - wing_inc + alpha_zero # ~0.04 rad + + # In steady glide: body pitch = alpha - gamma (nose below velocity) + # But our velocity is along glide path, so body pitch relative to horizontal = alpha - gamma + # For quaternion: we want nose tilted down from horizontal + pitch = alpha - gamma # Negative = nose down + + # Create quaternion for glide attitude (negative because positive Y rotation = nose down) + ori_w = np.cos(-pitch / 2) + ori_y = np.sin(-pitch / 2) + + # Velocity along glide path (descending) + vx = V_glide * np.cos(gamma) + vz = -V_glide * np.sin(gamma) # Negative = descending + + env.reset() + env.force_state( + player_pos=(0, 0, 2000), # High altitude for long glide + player_vel=(vx, 0, vz), + player_ori=(ori_w, 0, ori_y, 0), + player_throttle=0.0, + ) + + # Run with zero controls - let physics maintain steady glide + obs = env.observations + vzs = [] + speeds = [] + + for step in range(500): # 10 seconds + vz_obs = get_vz(obs) + speed = get_speed(obs) + + # Collect data after 2 seconds of settling + if step >= 100: + vzs.append(vz_obs) + speeds.append(speed) + + # Zero controls - pitch angle holds due to rate-based system + action = np.array([[-1.0, 0.0, 0.0, 0.0, 0.0]], dtype=np.float32) + obs, _, term, _, _ = env.step(action) + if term[0]: + break + + avg_vz = np.mean(vzs) if vzs else 0 # Should be negative (descending) + avg_sink = -avg_vz # Convert to positive sink rate + avg_speed = np.mean(speeds) if speeds else 0 + measured_LD = avg_speed / avg_sink if avg_sink > 0.1 else 0 + + RESULTS['glide_sink'] = avg_sink + RESULTS['glide_LD'] = measured_LD + + diff = avg_sink - sink_expected + status = "OK" if abs(diff) < 2 else "CHECK" + print(f"glide_ratio: L/D={measured_LD:4.1f} (theory: {LD_max:.1f}, sink: {avg_sink:.1f} m/s, expected: {sink_expected:.1f}) [{status}]") + + +def test_sustained_turn(): + """ + Sustained turn test - verifies banked flight produces a turn. + + Tests that at 30° bank, 100 m/s: + - Plane turns (heading changes) + - Turn rate is positive and consistent + - Altitude loss is bounded + + Note: The physics model produces ~2-3°/s at 30° bank (ideal theory: 3.2°/s). + This is acceptable for RL training - the physics is consistent. + """ + env = Dogfight(num_envs=1, render_mode=RENDER_MODE, render_fps=RENDER_FPS) + + # Test parameters - 30° bank is gentle and stable + V = 100.0 # m/s + bank_deg = 30.0 # degrees + bank = np.radians(bank_deg) + + # Build quaternion: small pitch up, then bank right + alpha = np.radians(3) # Small fixed pitch for lift + + # Pitch (negative = nose up) + qp_w = np.cos(-alpha / 2) + qp_y = np.sin(-alpha / 2) + + # Roll (negative = bank right due to quaternion convention) + qr_w = np.cos(-bank / 2) + qr_x = np.sin(-bank / 2) + + # Combined: q = qr * qp + ori_w = qr_w * qp_w + ori_x = qr_x * qp_w + ori_y = qr_w * qp_y + ori_z = qr_x * qp_y + + env.reset() + env.force_state( + player_pos=(0, 0, 1500), + player_vel=(V, 0, 0), + player_ori=(ori_w, ori_x, ori_y, ori_z), + player_throttle=1.0, + ) + + # Run with zero controls + headings = [] + speeds = [] + alts = [] + + for step in range(250): # 5 seconds + state = env.get_state() + vx, vy = state['vx'], state['vy'] + heading = np.arctan2(vy, vx) + speed = np.sqrt(vx**2 + vy**2 + state['vz']**2) + alt = state['pz'] + + if step >= 50: # After 1 second settling + headings.append(heading) + speeds.append(speed) + alts.append(alt) + + action = np.array([[1.0, 0.0, 0.0, 0.0, 0.0]], dtype=np.float32) + _, _, term, _, _ = env.step(action) + if term[0]: + break + + # Calculate turn rate + if len(headings) > 50: + headings = np.unwrap(headings) + heading_change = headings[-1] - headings[0] + time_elapsed = len(headings) * 0.02 + turn_rate_actual = np.degrees(heading_change / time_elapsed) + else: + turn_rate_actual = 0 + + avg_speed = np.mean(speeds) if speeds else 0 + alt_change = alts[-1] - alts[0] if len(alts) > 1 else 0 + + RESULTS['turn_rate'] = abs(turn_rate_actual) + + # Check: positive turn rate (plane is turning), not diving catastrophically + is_turning = abs(turn_rate_actual) > 1.0 + alt_ok = alt_change > -200 # Less than 200m loss in 5 seconds + status = "OK" if (is_turning and alt_ok) else "CHECK" + + print(f"turn_rate: {abs(turn_rate_actual):5.1f}°/s ({bank_deg:.0f}° bank, speed: {avg_speed:.0f}, Δalt: {alt_change:+.0f}m) [{status}]") + + +def test_turn_60(): + """ + Coordinated turn at 60° bank with PID control. + + P-51D reference: 60° bank (2.0g) at 350 mph gives 5°/s + At 100 m/s: theory = g*tan(60°)/V = 9.81*1.732/100 = 9.7°/s + """ + env = Dogfight(num_envs=1, render_mode=RENDER_MODE, render_fps=RENDER_FPS) + + bank_deg = 60.0 + bank_target = np.radians(bank_deg) + V = 100.0 + + # Right bank quaternion + ori_w = np.cos(bank_target / 2) + ori_x = -np.sin(bank_target / 2) + + env.reset() + env.force_state( + player_pos=(0, 0, 1500), + player_vel=(V, 0, 0), + player_ori=(ori_w, ori_x, 0.0, 0.0), + player_throttle=1.0, + ) + + # PID gains (found via sweep in debug_turn.py) + elev_kp, elev_kd = -0.05, 0.005 + roll_kp, roll_kd = -2.0, -0.1 + + prev_vz = 0.0 + prev_bank_error = 0.0 + + headings, alts, banks = [], [], [] + + for step in range(250): # 5 seconds + # Get state from raw state (independent of obs_scheme) + state = env.get_state() + vz = state['vz'] + alt = state['pz'] + vx, vy = state['vx'], state['vy'] + heading = np.arctan2(vy, vx) + up_y, up_z = state['up_y'], state['up_z'] + bank_actual = np.arccos(np.clip(up_z, -1, 1)) + if up_y < 0: + bank_actual = -bank_actual + + # Elevator PID + vz_error = -vz + vz_deriv = (vz - prev_vz) / 0.02 + elevator = elev_kp * vz_error + elev_kd * vz_deriv + elevator = np.clip(elevator, -1.0, 1.0) + prev_vz = vz + + # Aileron PID + bank_error = bank_target - bank_actual + bank_deriv = (bank_error - prev_bank_error) / 0.02 + aileron = roll_kp * bank_error + roll_kd * bank_deriv + aileron = np.clip(aileron, -1.0, 1.0) + prev_bank_error = bank_error + + if step >= 25: + headings.append(heading) + alts.append(alt) + banks.append(np.degrees(bank_actual)) + + action = np.array([[1.0, elevator, aileron, 0.0, 0.0]], dtype=np.float32) + _, _, term, _, _ = env.step(action) + if term[0]: + break + + # Calculate results + headings = np.unwrap(headings) + turn_rate = np.degrees((headings[-1] - headings[0]) / (len(headings) * 0.02)) + alt_change = alts[-1] - alts[0] + bank_mean = np.mean(banks) + theory_rate = np.degrees(9.81 * np.tan(bank_target) / V) + eff = 100 * turn_rate / theory_rate + + RESULTS['turn_rate_60'] = turn_rate + + status = "OK" if (85 < eff < 105 and abs(alt_change) < 50) else "CHECK" + print(f"turn_60: {turn_rate:5.1f}°/s (theory: {theory_rate:.1f}, eff: {eff:.0f}%, bank: {bank_mean:.0f}°, Δalt: {alt_change:+.0f}m) [{status}]") + + +def test_pitch_direction(): + """Verify positive elevator = nose DOWN (standard joystick: push forward).""" + env = Dogfight(num_envs=1, render_mode=RENDER_MODE, render_fps=RENDER_FPS) + env.reset() + + env.force_state(player_vel=(80, 0, 0)) + + # Get initial forward vector Z component (nose pointing direction) + initial_fwd_z = env.get_state()['fwd_z'] + + # Apply positive elevator (+1.0 = push forward) + action = np.array([[0.5, 1.0, 0.0, 0.0, 0.0]], dtype=np.float32) + for step in range(50): + env.step(action) + + # Check if nose went DOWN (fwd_z should decrease) + final_fwd_z = env.get_state()['fwd_z'] + nose_down = final_fwd_z < initial_fwd_z # fwd_z decreases when nose pitches down + + RESULTS['pitch_direction'] = 'DOWN' if nose_down else 'UP' + status = 'OK' if nose_down else 'WRONG' + print(f"pitch_dir: {RESULTS['pitch_direction']:>6} (+elev = nose DOWN) [{status}]") + + +def test_roll_direction(): + """Verify positive ailerons = roll right.""" + env = Dogfight(num_envs=1, render_mode=RENDER_MODE, render_fps=RENDER_FPS) + env.reset() + + env.force_state(player_vel=(80, 0, 0)) + + action = np.array([[0.5, 0.0, 1.0, 0.0, 0.0]], dtype=np.float32) + for _ in range(50): + env.step(action) + state = env.get_state() + up_y_changed = abs(state['up_y']) > 0.1 + RESULTS['roll_works'] = 'YES' if up_y_changed else 'NO' + status = 'OK' if up_y_changed else 'WRONG' + print(f"roll_works: {RESULTS['roll_works']:>6} (should be YES) [{status}]") + + +def test_rudder_only_turn(): + """ + Test: Wings level, nose on horizon, full rudder - measure yaw rate. + + P-51D rudder-only turns should achieve ~5-15 deg/s max yaw rate. + Current physics (MAX_YAW_RATE=1.5 rad/s) achieves ~86 deg/s which is unrealistic. + + This test uses PID control to: + - Hold wings level (ailerons fight any roll) + - Hold nose on horizon (elevator maintains level flight) + - Apply full rudder and measure resulting yaw rate + """ + env = Dogfight(num_envs=1, render_mode=RENDER_MODE, render_fps=RENDER_FPS) + env.reset() + + # Start at cruise speed, wings level + V = 120.0 # m/s cruise + env.force_state( + player_pos=(0, 0, 1000), + player_vel=(V, 0, 0), + player_ori=(1.0, 0.0, 0.0, 0.0), # Identity = wings level, heading +X + player_throttle=1.0, + ) + + # PID gains for wings level + roll_kp = 2.0 # Proportional + roll_kd = 0.1 # Derivative damping + + # PID gains for level flight (from existing tests) + elev_kp = 0.001 + elev_kd = 0.001 + + prev_roll = 0.0 + prev_vz = 0.0 + + headings = [] + + for step in range(300): # 6 seconds at 50Hz + # Extract state from raw state (independent of obs_scheme) + state = env.get_state() + vx, vy, vz = state['vx'], state['vy'], state['vz'] + up_y, up_z = state['up_y'], state['up_z'] + + # Calculate heading from velocity + heading = np.arctan2(vy, vx) + headings.append(heading) + + # Calculate roll angle from up vector + roll = np.arctan2(up_y, up_z) + + # Wings level PID: drive roll to zero + roll_error = 0.0 - roll + roll_deriv = (roll - prev_roll) / 0.02 + aileron = roll_kp * roll_error - roll_kd * roll_deriv + aileron = np.clip(aileron, -1.0, 1.0) + prev_roll = roll + + # Level flight PID: drive vz to zero + vz_error = 0.0 - vz + vz_deriv = (vz - prev_vz) / 0.02 + elevator = -elev_kp * vz_error - elev_kd * vz_deriv + elevator = np.clip(elevator, -0.3, 0.3) + prev_vz = vz + + # FULL RUDDER + rudder = 1.0 + + # Action: [throttle, elevator, aileron, rudder, trigger] + action = np.array([[1.0, elevator, aileron, rudder, 0.0]], dtype=np.float32) + _, _, term, _, _ = env.step(action) + + if term[0]: + break + + # Calculate yaw rate + headings = np.unwrap(headings) # Handle wraparound + if len(headings) > 100: + # Use last portion for steady-state + heading_change = headings[-1] - headings[100] + time_elapsed = (len(headings) - 100) * 0.02 + yaw_rate_deg_s = np.degrees(heading_change / time_elapsed) + else: + yaw_rate_deg_s = 0 + + RESULTS['rudder_yaw_rate'] = yaw_rate_deg_s + + # Realistic bounds: 5-15 deg/s for P-51D rudder-only + # Current unrealistic: ~86 deg/s (with MAX_YAW_RATE=1.5) + is_realistic = 5.0 < abs(yaw_rate_deg_s) < 20.0 + status = "OK" if is_realistic else "FAIL" + + print(f"rudder_only: {yaw_rate_deg_s:5.1f}°/s (target: 5-15°/s) [{status}]") + + +def test_knife_edge_pull(): + """ + Knife-edge pull test - validates that elevator becomes YAW when rolled 90°. + + Physics explanation: + - Plane rolled 90° right: right wing DOWN, canopy facing RIGHT + - Body axes after roll: + - Body X (nose): +X world (forward) + - Body Y (right wing): -Z world (DOWN) + - Body Z (canopy): +Y world (RIGHT) + - Negative elevator (pull back) = pitch up in BODY frame = rotation about body Y + - Body Y is now -Z world, so this is rotation about world -Z + - Right-hand rule: thumb on -Z, fingers curl +X toward -Y + - Result: Nose yaws LEFT in world frame (since we pull back = negative elevator) + + Expected behavior: + 1. Heading changes significantly (plane turns left with pull back) + 2. Altitude drops (lift is horizontal, not vertical) + 3. Up vector stays roughly horizontal (still in knife-edge) + 4. This is essentially a "flat turn" using elevator + + This tests that the quaternion kinematics correctly transform body-frame + rotations to world-frame effects. + """ + env = Dogfight(num_envs=1, render_mode=RENDER_MODE, render_fps=RENDER_FPS) + env.reset() + + # Start at high speed to avoid stall during the pull + V = 150.0 # m/s - well above stall speed even at high AoA + + # Use EXACT 90° right roll via force_state for precise test + # Roll -90° about X axis: q = (cos(45°), -sin(45°), 0, 0) + roll_90 = np.radians(90) + qw = np.cos(roll_90 / 2) + qx = -np.sin(roll_90 / 2) # Negative for right roll + + env.force_state( + player_pos=(0, 0, 1500), + player_vel=(V, 0, 0), # Flying +X + player_ori=(qw, qx, 0.0, 0.0), # EXACT 90° right roll + player_throttle=1.0, + ) + + # Verify knife-edge achieved + state = env.get_state() + up_x, up_y, up_z = state['up_x'], state['up_y'], state['up_z'] + + # Record initial state + alt_start = state['pz'] + vx_start, vy_start = state['vx'], state['vy'] + heading_start = np.arctan2(vy_start, vx_start) + + # --- Phase 2: Full elevator pull in knife-edge --- + headings = [] + alts = [] + up_zs = [] + + for step in range(100): # 2 seconds + state = env.get_state() + vx, vy, vz = state['vx'], state['vy'], state['vz'] + heading = np.arctan2(vy, vx) + alt = state['pz'] + up_z_now = state['up_z'] + + headings.append(heading) + alts.append(alt) + up_zs.append(up_z_now) + + # Full throttle, FULL ELEVATOR PULL, no aileron, no rudder + # Convention: -elevator = pull back = nose up + action = np.array([[1.0, -1.0, 0.0, 0.0, 0.0]], dtype=np.float32) + _, _, term, _, _ = env.step(action) + if term[0]: + break + + # --- Analysis --- + headings = np.unwrap(headings) + heading_change = np.degrees(headings[-1] - headings[0]) + alt_loss = alt_start - alts[-1] + avg_up_z = np.mean(up_zs) + time_elapsed = len(headings) * 0.02 + + # Calculate turn rate + turn_rate = heading_change / time_elapsed if time_elapsed > 0 else 0 + + RESULTS['knife_pull_turn'] = turn_rate + RESULTS['knife_pull_alt_loss'] = alt_loss + + # Expected: + # 1. Significant heading change (turns left with pull back, so negative) + # 2. Altitude loss (no vertical lift) + # 3. Up vector stays near horizontal (|up_z| small) + + # In our coordinate system: X forward, Y left, Z up + # atan2(vy, vx) increases when turning left (positive vy) + heading_ok = heading_change > 20 # Should turn at least 20° left in 2 seconds + alt_ok = alt_loss > 5 # Should lose altitude + roll_maintained = abs(avg_up_z) < 0.3 # Up vector stays roughly horizontal + + all_ok = heading_ok and alt_ok and roll_maintained + status = "OK" if all_ok else "CHECK" + + # Positive heading change = LEFT turn (Y is left in our coords) + direction = "LEFT" if heading_change > 0 else "RIGHT" + print(f"knife_pull: turn={turn_rate:+.1f}°/s ({direction}), alt_lost={alt_loss:.0f}m, |up_z|={abs(avg_up_z):.2f} [{status}]") + + if not heading_ok: + print(f" WARNING: Expected significant left turn, got {heading_change:.1f}° heading change") + if not alt_ok: + print(f" WARNING: Expected altitude loss, got {alt_loss:.1f}m") + if not roll_maintained: + print(f" WARNING: Roll not maintained, up_z={avg_up_z:.2f} (should be near 0)") + + +def test_knife_edge_flight(): + """ + Knife-edge flight test - validates that the plane CANNOT maintain altitude. + + In knife-edge flight (90° roll), the wings are vertical and generate + NO vertical lift. The plane must rely on: + 1. Fuselage side area (very inefficient, NOT modeled) + 2. Rudder sideforce (NOT modeled - rudder only creates yaw rate) + 3. Thrust vector (only if nosed up significantly) + + A P-51D is NOT designed for knife-edge - streamlined fuselage = poor side area. + Even purpose-built aerobatic planes struggle to maintain altitude in true knife-edge. + + Expected behavior: Plane should lose altitude rapidly (~9 m/s sink or more). + The nose may yaw from rudder input, but vertical force is insufficient. + + Sources: + - https://www.thenakedscientists.com/articles/questions/what-produces-lift-during-knife-edge-pass + - https://www.aopa.org/news-and-media/all-news/1998/august/flight-training-magazine/form-and-function + """ + env = Dogfight(num_envs=1, render_mode=RENDER_MODE, render_fps=RENDER_FPS) + env.reset() + + # Start at cruise speed, wings level, flying +X + V = 120.0 # m/s - fast enough for good control authority + env.force_state( + player_pos=(0, 0, 1500), # High altitude for test duration + player_vel=(V, 0, 0), # Flying +X direction + player_ori=(1.0, 0.0, 0.0, 0.0), # Wings level + player_throttle=1.0, + ) + + # --- Phase 1: Roll to knife-edge (90° right) --- + # Takes about 30 steps at MAX_ROLL_RATE=3.0 rad/s (0.5s to roll 90°) + for step in range(30): + # Full right aileron to roll 90° + action = np.array([[1.0, 0.0, 1.0, 0.0, 0.0]], dtype=np.float32) + env.step(action) + + # Verify we're in knife-edge (up vector should be pointing +Y or -Y) + state = env.get_state() + up_y, up_z = state['up_y'], state['up_z'] + roll_deg = np.degrees(np.arccos(np.clip(up_z, -1, 1))) + + # Record altitude at start of knife-edge + alt_start = state['pz'] + + if abs(roll_deg - 90) > 15: + print(f"knife_edge: [SKIP] Failed to roll to 90° (got {roll_deg:.0f}°)") + return + + # --- Phase 2: Knife-edge with full top rudder --- + # Right wing is down (up_y < 0 means rolled right) + # "Top rudder" = left rudder = yaw left in body frame = nose up in knife-edge body frame + # But in world frame, this tries to yaw the nose sideways, not up + + alts = [] + vzs = [] + + for step in range(150): # 3 seconds at 50Hz + state = env.get_state() + alt = state['pz'] + vz = state['vz'] + alts.append(alt) + vzs.append(vz) + + # Full throttle, no elevator, no aileron (hold knife-edge), FULL LEFT RUDDER + # Left rudder = positive rudder = yaw left in body frame + # In knife-edge (rolled 90° right), body-left is world-up + # So this SHOULD help keep nose up... if rudder created sideforce + action = np.array([[1.0, 0.0, 0.0, 1.0, 0.0]], dtype=np.float32) + _, _, term, _, _ = env.step(action) + if term[0]: + break + + alt_end = alts[-1] if alts else alt_start + alt_loss = alt_start - alt_end + avg_vz = np.mean(vzs) if vzs else 0 + time_elapsed = len(alts) * 0.02 # seconds + + # Calculate sink rate + sink_rate = alt_loss / time_elapsed if time_elapsed > 0 else 0 + + RESULTS['knife_edge_sink'] = sink_rate + RESULTS['knife_edge_alt_loss'] = alt_loss + + # Expected: significant altitude loss + # At 1g downward acceleration: v = g*t = 9.81 * 3 = 29 m/s after 3s + # Distance = 0.5 * g * t^2 = 0.5 * 9.81 * 9 = 44 m (free fall) + # With some lift from thrust vector angle, maybe 20-30m loss + # If plane CAN maintain altitude (loss < 5m), physics is WRONG + + is_realistic = alt_loss > 10 # Should lose at least 10m in 3 seconds + status = "OK" if is_realistic else "FAIL - physics allows impossible knife-edge!" + + print(f"knife_edge: sink={sink_rate:5.1f} m/s, alt_lost={alt_loss:.0f}m in {time_elapsed:.1f}s [{status}]") + + if not is_realistic: + print(f" WARNING: P-51D should NOT maintain altitude in knife-edge!") + print(f" Wings are vertical = no lift. Rudder only creates yaw, not sideforce.") + print(f" Consider: Is thrust somehow pointing upward? Is there phantom lift?") + + +def test_mode_weights(): + """ + Test that mode_weights actually biases autopilot randomization. + + Sets 100% weight on AP_LEVEL, triggers multiple resets, + verifies that selected mode is always AP_LEVEL. + """ + env = Dogfight(num_envs=1, render_mode=RENDER_MODE, render_fps=RENDER_FPS) + env.reset() + + # Set AP_RANDOM mode and bias 100% toward LEVEL + env.set_autopilot(env_idx=0, mode=AutopilotMode.RANDOM) + env.set_mode_weights(level=1.0, turn_left=0.0, turn_right=0.0, climb=0.0, descend=0.0) + + # Trigger multiple resets and check mode each time + level_count = 0 + num_trials = 50 + + for _ in range(num_trials): + env.reset() + mode = env.get_autopilot_mode(env_idx=0) + if mode == AutopilotMode.LEVEL: + level_count += 1 + + pct = 100 * level_count / num_trials + RESULTS['mode_weights'] = pct + + # With 100% weight on LEVEL, should always get LEVEL + status = "OK" if pct == 100 else "CHECK" + print(f"mode_weights: {pct:5.1f}% (should be 100% AP_LEVEL) [{status}]") + + # Also test distribution with mixed weights + env.set_autopilot(env_idx=0, mode=AutopilotMode.RANDOM) # Re-enable randomization + env.set_mode_weights(level=0.5, turn_left=0.25, turn_right=0.25, climb=0.0, descend=0.0) + + counts = {1: 0, 2: 0, 3: 0, 4: 0, 5: 0} # LEVEL, TURN_L, TURN_R, CLIMB, DESCEND + num_trials = 200 + + for _ in range(num_trials): + env.reset() + mode = env.get_autopilot_mode(env_idx=0) + if mode in counts: + counts[mode] += 1 + + # Check that LEVEL is most common (~50%) and CLIMB/DESCEND are rare (~0%) + level_pct = 100 * counts[1] / num_trials + climb_pct = 100 * counts[4] / num_trials + distribution_ok = level_pct > 35 and climb_pct < 10 + status2 = "OK" if distribution_ok else "CHECK" + print(f" distribution: LEVEL={level_pct:.0f}%, TURN_L={100*counts[2]/num_trials:.0f}%, TURN_R={100*counts[3]/num_trials:.0f}%, CLIMB={climb_pct:.0f}% [{status2}]") + + +# ============================================================================= +# G-FORCE TESTS - Validate G-loading physics +# ============================================================================= + +def test_g_level_flight(): + """ + Level flight at cruise speed - verify G ≈ 1.0. + In steady level flight, lift equals weight, so G-loading should be ~1.0. + """ + env = Dogfight(num_envs=1, render_mode=RENDER_MODE, render_fps=RENDER_FPS) + env.reset() + + # Start at cruise speed, level + env.force_state( + player_pos=(0, 0, 1000), + player_vel=(120, 0, 0), + player_throttle=0.5, + ) + + g_values = [] + for step in range(200): # 4 seconds + elevator = level_flight_pitch_from_state(env) + action = np.array([[0.0, elevator, 0.0, 0.0, 0.0]], dtype=np.float32) + env.step(action) + + g = env.get_state()['g_force'] + g_values.append(g) + + if step % 25 == 0: + print(f" step {step:3d}: G = {g:.2f}") + + avg_g = np.mean(g_values[-100:]) # Last 2 seconds + RESULTS['g_level'] = avg_g + + status = "OK" if 0.8 < avg_g < 1.2 else "CHECK" + print(f"g_level: {avg_g:.2f} G (target: ~1.0) [{status}]") + + +def test_g_push_forward(): + """ + Push elevator forward - verify G decreases toward 0 and negative. + Reset to level flight for each test to avoid looping artifacts. + """ + env = Dogfight(num_envs=1, render_mode=RENDER_MODE, render_fps=RENDER_FPS) + + print(" Pushing forward (positive elevator = nose down):") + min_g = float('inf') + + for elev in [0.0, 0.25, 0.5, 0.75, 1.0]: + # Reset to level flight for each elevator setting + env.reset() + env.force_state( + player_pos=(0, 0, 1500), + player_vel=(150, 0, 0), + player_throttle=1.0, + ) + + # Run for 10 steps (0.2 sec) and track min G + test_min_g = float('inf') + for _ in range(10): + action = np.array([[1.0, elev, 0.0, 0.0, 0.0]], dtype=np.float32) + env.step(action) + g = env.get_state()['g_force'] + test_min_g = min(test_min_g, g) + + min_g = min(min_g, test_min_g) + print(f" elevator={elev:+.2f}: min G = {test_min_g:+.2f}") + + RESULTS['g_push'] = min_g + + # Full push should give low/negative G + status = "OK" if min_g < 0.5 else "CHECK" + print(f"g_push: {min_g:+.2f} G (push should give < 0.5G) [{status}]") + + +def test_g_pull_back(): + """ + Pull elevator back - verify G increases above 1.0. + Reset to level flight for each test to avoid looping artifacts. + """ + env = Dogfight(num_envs=1, render_mode=RENDER_MODE, render_fps=RENDER_FPS) + + print(" Pulling back (negative elevator = nose up):") + max_g = float('-inf') + + for elev in [0.0, -0.25, -0.5, -0.75, -1.0]: + # Reset to level flight for each elevator setting + env.reset() + env.force_state( + player_pos=(0, 0, 1500), + player_vel=(150, 0, 0), # Higher speed for more G capability + player_throttle=1.0, + ) + + # Run for 10 steps (0.2 sec) and track max G + test_max_g = float('-inf') + for _ in range(10): + action = np.array([[1.0, elev, 0.0, 0.0, 0.0]], dtype=np.float32) + env.step(action) + g = env.get_state()['g_force'] + test_max_g = max(test_max_g, g) + + max_g = max(max_g, test_max_g) + print(f" elevator={elev:+.2f}: max G = {test_max_g:+.2f}") + + RESULTS['g_pull'] = max_g + + # Full pull should give high G (at 150 m/s, should hit ~5-6G) + status = "OK" if max_g > 4.0 else "CHECK" + print(f"g_pull: {max_g:+.2f} G (pull should give > 4.0G) [{status}]") + + +def test_g_limit_negative(): + """ + Full forward stick - verify G never goes below -1.5G (G_LIMIT_NEG). + Physics should clamp acceleration to prevent exceeding this limit. + """ + env = Dogfight(num_envs=1, render_mode=RENDER_MODE, render_fps=RENDER_FPS) + env.reset() + + # Start at high speed for maximum control authority + env.force_state( + player_pos=(0, 0, 2000), + player_vel=(150, 0, 0), + player_throttle=1.0, + ) + + g_min = float('inf') + for step in range(150): # 3 seconds of full push + action = np.array([[1.0, 1.0, 0.0, 0.0, 0.0]], dtype=np.float32) # Full forward + env.step(action) + + g = env.get_state()['g_force'] + g_min = min(g_min, g) + + if step % 25 == 0: + print(f" step {step:3d}: G = {g:+.2f} (min so far: {g_min:+.2f})") + + RESULTS['g_min'] = g_min + + # Should never go below -1.5G (with small tolerance) + G_LIMIT_NEG = -1.5 + status = "OK" if g_min >= G_LIMIT_NEG - 0.1 else "FAIL" + print(f"g_limit_neg: {g_min:+.2f} G (limit: {G_LIMIT_NEG}G) [{status}]") + assert g_min >= G_LIMIT_NEG - 0.1, f"G went below limit: {g_min} < {G_LIMIT_NEG}" + + +def test_g_limit_positive(): + """ + Full back stick - verify G never exceeds 6G (G_LIMIT_POS). + Physics should clamp acceleration to prevent exceeding this limit. + """ + env = Dogfight(num_envs=1, render_mode=RENDER_MODE, render_fps=RENDER_FPS) + env.reset() + + # Start at high speed for maximum G capability + env.force_state( + player_pos=(0, 0, 2000), + player_vel=(180, 0, 0), # Very fast + player_throttle=1.0, + ) + + g_max = float('-inf') + for step in range(150): # 3 seconds of full pull + action = np.array([[1.0, -1.0, 0.0, 0.0, 0.0]], dtype=np.float32) # Full pull + env.step(action) + + g = env.get_state()['g_force'] + g_max = max(g_max, g) + + if step % 25 == 0: + print(f" step {step:3d}: G = {g:+.2f} (max so far: {g_max:+.2f})") + + RESULTS['g_max'] = g_max + + # Should never exceed 6G (with small tolerance) + G_LIMIT_POS = 6.0 + status = "OK" if g_max <= G_LIMIT_POS + 0.1 else "FAIL" + print(f"g_limit_pos: {g_max:+.2f} G (limit: {G_LIMIT_POS}G) [{status}]") + assert g_max <= G_LIMIT_POS + 0.1, f"G exceeded limit: {g_max} > {G_LIMIT_POS}" + + +def test_gentle_pitch_control(): + """ + Test that small elevator inputs produce proportional, gentle pitch changes. + + This is CRITICAL for fine aim adjustments - the agent must be able to make + precise 2.5° corrections, not just bang-bang full deflection. + + Tests: + 1. -0.1 elevator: should give small pitch rate (~5°/s or less) + 2. -0.25 elevator: should give larger pitch rate (~10-15°/s) + 3. Verify linear relationship (not bang-bang) + 4. Calculate time to make 2.5° adjustment + """ + env = Dogfight(num_envs=1, render_mode=RENDER_MODE, render_fps=RENDER_FPS) + + elevator_values = [-0.05, -0.1, -0.15, -0.2, -0.25, -0.3] + pitch_rates = [] + + print(" Testing gentle elevator inputs (negative = pull back = nose UP):") + + for elev in elevator_values: + env.reset() + + # Start level at cruise speed + env.force_state( + player_pos=(0, 0, 1500), + player_vel=(120, 0, 0), # Cruise speed + player_ori=(1.0, 0.0, 0.0, 0.0), # Wings level + player_throttle=0.7, + ) + + # Record initial pitch + state = env.get_state() + fwd_x_start, fwd_z_start = state['fwd_x'], state['fwd_z'] + pitch_start = np.arctan2(fwd_z_start, fwd_x_start) + + # Apply constant elevator for 1 second (50 steps) + for step in range(50): + action = np.array([[0.4, elev, 0.0, 0.0, 0.0]], dtype=np.float32) + env.step(action) + + # Measure final pitch + state = env.get_state() + fwd_x_end, fwd_z_end = state['fwd_x'], state['fwd_z'] + pitch_end = np.arctan2(fwd_z_end, fwd_x_end) + + pitch_change_deg = np.degrees(pitch_end - pitch_start) + pitch_rate = pitch_change_deg / 1.0 # degrees per second + pitch_rates.append(pitch_rate) + + print(f" elevator={elev:+.2f}: pitch_rate={pitch_rate:+.1f}°/s, pitch_change={pitch_change_deg:+.1f}°") + + # Check for proportional response + # Ratio of pitch rates should roughly match ratio of elevator inputs + rate_at_01 = pitch_rates[1] # -0.1 elevator + rate_at_025 = pitch_rates[4] # -0.25 elevator + + # Store results + RESULTS['pitch_rate_01'] = rate_at_01 + RESULTS['pitch_rate_025'] = rate_at_025 + + # Calculate time to make 2.5° adjustment at -0.1 elevator + if abs(rate_at_01) > 0.1: + time_for_25deg = 2.5 / abs(rate_at_01) + else: + time_for_25deg = float('inf') + + RESULTS['time_for_25deg'] = time_for_25deg + + # Check proportionality: -0.25 should give ~2.5x the rate of -0.1 + expected_ratio = 2.5 + actual_ratio = rate_at_025 / rate_at_01 if abs(rate_at_01) > 0.1 else 0 + + # Verify reasonable pitch rates (not too fast, not too slow) + # -0.1 elevator should give roughly 3-8°/s (gentle but noticeable) + gentle_ok = 2.0 < abs(rate_at_01) < 15.0 + proportional_ok = 1.5 < actual_ratio < 4.0 # Some non-linearity is OK + can_aim = time_for_25deg < 2.0 # Should be able to make 2.5° adjustment in <2 seconds + + all_ok = gentle_ok and proportional_ok and can_aim + status = "OK" if all_ok else "CHECK" + + print(f" Results:") + print(f" -0.1 elevator gives {rate_at_01:+.1f}°/s (want 3-8°/s) [{gentle_ok and 'OK' or 'CHECK'}]") + print(f" -0.25/-0.1 ratio = {actual_ratio:.2f} (want ~2.5, linear) [{proportional_ok and 'OK' or 'CHECK'}]") + print(f" Time to adjust 2.5° at -0.1: {time_for_25deg:.2f}s (want <2s) [{can_aim and 'OK' or 'CHECK'}]") + print(f"gentle_pitch: rate@-0.1={rate_at_01:+.1f}°/s, 2.5°_time={time_for_25deg:.2f}s [{status}]") + + if not gentle_ok: + if abs(rate_at_01) < 2.0: + print(f" WARNING: Pitch too sluggish! Agent can't make timely aim corrections.") + else: + print(f" WARNING: Pitch too sensitive! Agent will overshoot aim.") + + if not proportional_ok: + print(f" WARNING: Non-linear pitch response - may indicate bang-bang controls.") + + +def print_summary(): + """Print summary table.""" + print("\n" + "=" * 60) + print("SUMMARY") + print("=" * 60) + + def fmt(key): + v = RESULTS.get(key) + if v is None: + return 'N/A' + if isinstance(v, float): + return f"{v:.1f}" + return str(v) + + print(f"| Metric | Result | P-51D Target |") + print(f"|----------------|--------|--------------|") + print(f"| max_speed | {fmt('max_speed'):>6} | {P51D_MAX_SPEED:.0f} m/s |") + print(f"| cruise_speed | {fmt('cruise_speed'):>6} | - |") + print(f"| stall_speed | {fmt('stall_speed'):>6} | {P51D_STALL_SPEED:.0f} m/s |") + print(f"| climb_rate | {fmt('climb_rate'):>6} | {P51D_CLIMB_RATE:.0f} m/s |") + print(f"| glide_L/D | {fmt('glide_LD'):>6} | 14.6 |") + print(f"| turn_rate | {fmt('turn_rate'):>6} | 5.6°/s (45° bank) |") + print(f"| rudder_yaw | {fmt('rudder_yaw_rate'):>6} | 5-15°/s (wings lvl) |") + print(f"| pitch_dir | {fmt('pitch_direction'):>6} | DOWN (+elev) |") + print(f"| roll_works | {fmt('roll_works'):>6} | YES |") + + +if __name__ == "__main__": + # Map test names to functions + TESTS = { + 'max_speed': test_max_speed, + 'acceleration': test_acceleration, + 'deceleration': test_deceleration, + 'cruise_speed': test_cruise_speed, + 'stall_speed': test_stall_speed, + 'climb_rate': test_climb_rate, + 'glide_ratio': test_glide_ratio, + 'sustained_turn': test_sustained_turn, + 'turn_60': test_turn_60, + 'pitch_direction': test_pitch_direction, + 'roll_direction': test_roll_direction, + 'rudder_only_turn': test_rudder_only_turn, + 'knife_edge_pull': test_knife_edge_pull, + 'knife_edge_flight': test_knife_edge_flight, + 'mode_weights': test_mode_weights, + # G-force tests + 'g_level_flight': test_g_level_flight, + 'g_push_forward': test_g_push_forward, + 'g_pull_back': test_g_pull_back, + 'g_limit_negative': test_g_limit_negative, + 'g_limit_positive': test_g_limit_positive, + # Fine control tests + 'gentle_pitch': test_gentle_pitch_control, + } + + print("P-51D Physics Validation Tests") + print("=" * 60) + + if ARGS.test: + # Run single test + if ARGS.test in TESTS: + print(f"Running single test: {ARGS.test}") + if RENDER_MODE: + print("Rendering enabled - press ESC to exit") + print("=" * 60) + TESTS[ARGS.test]() + else: + print(f"Unknown test: {ARGS.test}") + print(f"Available tests: {', '.join(TESTS.keys())}") + else: + # Run all tests + print("Using force_state() for precise initial conditions") + if RENDER_MODE: + print("Rendering enabled - press ESC to exit") + print("=" * 60) + for test_func in TESTS.values(): + test_func() + print_summary() diff --git a/pufferlib/ocean/environment.py b/pufferlib/ocean/environment.py index 93df76506..70cd2d988 100644 --- a/pufferlib/ocean/environment.py +++ b/pufferlib/ocean/environment.py @@ -122,6 +122,7 @@ def make_multiagent(buf=None, **kwargs): 'blastar': 'Blastar', 'convert': 'Convert', 'convert_circle': 'ConvertCircle', + 'dogfight': 'Dogfight', 'pong': 'Pong', 'freeway': 'Freeway', 'enduro': 'Enduro', diff --git a/pufferlib/ocean/torch.py b/pufferlib/ocean/torch.py index 8cf4ffe7d..5d9d0e4d9 100644 --- a/pufferlib/ocean/torch.py +++ b/pufferlib/ocean/torch.py @@ -299,7 +299,7 @@ def decode_actions(self, flat_hidden, state=None): value = self.value_fn(flat_hidden) if self.is_continuous: mean = self.decoder_mean(flat_hidden) - logstd = self.decoder_logstd.expand_as(mean) + logstd = self.decoder_logstd.expand_as(mean).clamp(min=-20, max=2) std = torch.exp(logstd) probs = torch.distributions.Normal(mean, std) batch = flat_hidden.shape[0] @@ -433,7 +433,7 @@ def decode_actions(self, flat_hidden): value = self.value_fn(flat_hidden) if self.is_continuous: mean = self.decoder_mean(flat_hidden) - logstd = self.decoder_logstd.expand_as(mean) + logstd = self.decoder_logstd.expand_as(mean).clamp(min=-20, max=2) std = torch.exp(logstd) probs = torch.distributions.Normal(mean, std) batch = flat_hidden.shape[0] @@ -893,7 +893,7 @@ def decode_actions(self, hidden): logits = self.decoder(hidden).split(self.action_nvec, dim=1) elif self.is_continuous: mean = self.decoder_mean(hidden) - logstd = self.decoder_logstd.expand_as(mean) + logstd = self.decoder_logstd.expand_as(mean).clamp(min=-20, max=2) std = torch.exp(logstd) logits = torch.distributions.Normal(mean, std) else: diff --git a/pufferlib/pufferl.py b/pufferlib/pufferl.py index 3972e722f..6abf8b439 100644 --- a/pufferlib/pufferl.py +++ b/pufferlib/pufferl.py @@ -939,6 +939,14 @@ def train(env_name, args=None, vecenv=None, policy=None, logger=None, should_sto logger = WandbLogger(args) train_config = { **args['train'], 'env': env_name } + + # Print training hyperparameters for debugging + print(f"=== TRAIN CONFIG ===") + print(f" clip_coef={train_config.get('clip_coef', 'N/A'):.4f}, gae_lambda={train_config.get('gae_lambda', 'N/A'):.4f}") + print(f" learning_rate={train_config.get('learning_rate', 'N/A'):.6f}, max_grad_norm={train_config.get('max_grad_norm', 'N/A'):.4f}") + print(f" gamma={train_config.get('gamma', 'N/A'):.6f}, ent_coef={train_config.get('ent_coef', 'N/A'):.6f}") + print(f" adam_eps={train_config.get('adam_eps', 'N/A'):.2e}") + pufferl = PuffeRL(train_config, vecenv, policy, logger) all_logs = [] diff --git a/setup.py b/setup.py index 552cb00e8..9fd3bf9fc 100644 --- a/setup.py +++ b/setup.py @@ -189,14 +189,15 @@ def run(self): # Find C extensions c_extensions = [] if not NO_OCEAN: - c_extension_paths = glob.glob('pufferlib/ocean/**/binding.c', recursive=True) + #c_extension_paths = glob.glob('pufferlib/ocean/**/binding.c', recursive=True) + c_extension_paths = ['pufferlib/ocean/dogfight/binding.c'] c_extensions = [ Extension( path.rstrip('.c').replace('/', '.'), sources=[path], **extension_kwargs, ) - for path in c_extension_paths if 'matsci' not in path + for path in c_extension_paths# if 'matsci' not in path ] c_extension_paths = [os.path.join(*path.split('/')[:-1]) for path in c_extension_paths]