experiment#


class A2CExperimentBuilder(env_factory: EnvFactory, experiment_config: ExperimentConfig | None = None, sampling_config: SamplingConfig | None = None)[source]#
with_a2c_params(params: A2CParams) Self[source]#
class DDPGExperimentBuilder(env_factory: EnvFactory, experiment_config: ExperimentConfig | None = None, sampling_config: SamplingConfig | None = None)[source]#
with_ddpg_params(params: DDPGParams) Self[source]#
class DQNExperimentBuilder(env_factory: EnvFactory, experiment_config: ExperimentConfig | None = None, sampling_config: SamplingConfig | None = None)[source]#
with_dqn_params(params: DQNParams) Self[source]#
with_model_factory(module_factory: IntermediateModuleFactory) Self[source]#
Parameters:

module_factory – factory for a module which maps environment observations to a vector of Q-values (one for each action)

Returns:

the builder

with_model_factory_default(hidden_sizes: ~collections.abc.Sequence[int], hidden_activation: type[~torch.nn.modules.module.Module] = <class 'torch.nn.modules.activation.ReLU'>) Self[source]#

Allows to configure the default factory for the model of the Q function, which maps environment observations to a vector of Q-values (one for each action). The default model is a multi-layer perceptron.

Parameters:
  • hidden_sizes – the sequence of dimensions used for hidden layers

  • hidden_activation – the activation function to use for hidden layers (not used for the output layer)

Returns:

the builder

class DiscreteSACExperimentBuilder(env_factory: EnvFactory, experiment_config: ExperimentConfig | None = None, sampling_config: SamplingConfig | None = None)[source]#
with_sac_params(params: DiscreteSACParams) Self[source]#
class Experiment(config: ExperimentConfig, env_factory: EnvFactory, agent_factory: AgentFactory, sampling_config: SamplingConfig, logger_factory: LoggerFactory | None = None)[source]#

Represents a reinforcement learning experiment.

An experiment is composed only of configuration and factory objects, which themselves should be designed to contain only configuration. Therefore, experiments can easily be stored/pickled and later restored without any problems.

EXPERIMENT_PICKLE_FILENAME = 'experiment.pkl'#
LOG_FILENAME = 'log.txt'#
classmethod from_directory(directory: str, restore_policy: bool = True) Experiment[source]#

Restores an experiment from a previously stored pickle.

Parameters:
  • directory – persistence directory of a previous run, in which a pickled experiment is found

  • restore_policy – whether the experiment shall be configured to restore the policy that was persisted in the given directory

run(experiment_name: str | None = None, logger_run_id: str | None = None) ExperimentResult[source]#

Run the experiment and return the results.

Parameters:
  • experiment_name – the experiment name, which corresponds to the directory (within the logging directory) where all results associated with the experiment will be saved. The name may contain path separators (i.e. os.path.sep, as used by os.path.join), in which case a nested directory structure will be created. If None, use a name containing the current date and time.

  • logger_run_id – Run identifier to use for logger initialization/resumption (applies when using wandb, in particular).

Returns:

save(directory: str) None[source]#
class ExperimentBuilder(env_factory: EnvFactory, experiment_config: ExperimentConfig | None = None, sampling_config: SamplingConfig | None = None)[source]#
build() Experiment[source]#

Creates the experiment based on the options specified via this builder.

Returns:

the experiment

with_epoch_stop_callback(callback: EpochStopCallback) Self[source]#

Allows to define a callback that decides whether training shall stop early.

The callback receives the undiscounted returns of the testing result.

Parameters:

callback – the callback

Returns:

the builder

with_epoch_test_callback(callback: EpochTestCallback) Self[source]#

Allows to define a callback function which is called at the beginning of testing in each epoch.

Parameters:

callback – the callback

Returns:

the builder

with_epoch_train_callback(callback: EpochTrainCallback) Self[source]#

Allows to define a callback function which is called at the beginning of every epoch during training.

Parameters:

callback – the callback

Returns:

the builder

with_logger_factory(logger_factory: LoggerFactory) Self[source]#

Allows to customize the logger factory to use.

If this method is not called, the default logger factory LoggerFactoryDefault will be used.

Parameters:

logger_factory – the factory to use

Returns:

the builder

with_optim_factory(optim_factory: OptimizerFactory) Self[source]#

Allows to customize the gradient-based optimizer to use.

By default, OptimizerFactoryAdam will be used with default parameters.

Parameters:

optim_factory – the optimizer factory

Returns:

the builder

with_optim_factory_default(betas: tuple[float, float] = (0.9, 0.999), eps: float = 1e-08, weight_decay: float = 0) Self[source]#

Configures the use of the default optimizer, Adam, with the given parameters.

Parameters:
  • betas – coefficients used for computing running averages of gradient and its square

  • eps – term added to the denominator to improve numerical stability

  • weight_decay – weight decay (L2 penalty)

Returns:

the builder

with_policy_wrapper_factory(policy_wrapper_factory: PolicyWrapperFactory) Self[source]#

Allows to define a wrapper around the policy that is created, extending the original policy.

Parameters:

policy_wrapper_factory – the factory for the wrapper

Returns:

the builder

class ExperimentConfig(seed: int = 42, device: str | device = 'cpu', policy_restore_directory: str | None = None, train: bool = True, watch: bool = True, watch_num_episodes: int = 10, watch_render: float = 0.0, persistence_base_dir: str = 'log', persistence_enabled: bool = True, log_file_enabled: bool = True, policy_persistence_mode: Mode = Mode.POLICY)[source]#

Generic config for setting up the experiment, not RL or training specific.

device: str | device = 'cpu'#

The torch device to use

log_file_enabled: bool = True#

Whether to write to a log file; has no effect if persistence_enabled is False. Disable this if you have externally configured log file generation.

persistence_base_dir: str = 'log'#

Base directory in which experiment data is to be stored. Every experiment run will create a subdirectory in this directory based on the run’s experiment name

persistence_enabled: bool = True#

Whether persistence is enabled, allowing files to be stored

policy_persistence_mode: Mode = 'policy'#

Controls the way in which the policy is persisted

policy_restore_directory: str | None = None#

Directory from which to load the policy neural network parameters (persistence directory of a previous run)

seed: int = 42#

The random seed with which to initialize random number generators.

train: bool = True#

Whether to perform training

watch: bool = True#

Whether to watch agent performance (after training)

watch_num_episodes: int = 10#

Number of episodes for which to watch performance (if watch is enabled)

watch_render: float = 0.0#

Milliseconds between rendered frames when watching agent performance (if watch is enabled)

class ExperimentResult(world: World, trainer_result: InfoStats | None)[source]#

Contains the results of an experiment.

trainer_result: InfoStats | None#

dataclass of results as returned by the trainer (if any)

world: World#

contains all the essential instances of the experiment

class IQNExperimentBuilder(env_factory: EnvFactory, experiment_config: ExperimentConfig | None = None, sampling_config: SamplingConfig | None = None)[source]#
with_iqn_params(params: IQNParams) Self[source]#
with_preprocess_network_factory(module_factory: IntermediateModuleFactory) Self[source]#
class NPGExperimentBuilder(env_factory: EnvFactory, experiment_config: ExperimentConfig | None = None, sampling_config: SamplingConfig | None = None)[source]#
with_npg_params(params: NPGParams) Self[source]#
class PGExperimentBuilder(env_factory: EnvFactory, experiment_config: ExperimentConfig | None = None, sampling_config: SamplingConfig | None = None)[source]#
with_pg_params(params: PGParams) Self[source]#
class PPOExperimentBuilder(env_factory: EnvFactory, experiment_config: ExperimentConfig | None = None, sampling_config: SamplingConfig | None = None)[source]#
with_ppo_params(params: PPOParams) Self[source]#
class REDQExperimentBuilder(env_factory: EnvFactory, experiment_config: ExperimentConfig | None = None, sampling_config: SamplingConfig | None = None)[source]#
with_redq_params(params: REDQParams) Self[source]#
class SACExperimentBuilder(env_factory: EnvFactory, experiment_config: ExperimentConfig | None = None, sampling_config: SamplingConfig | None = None)[source]#
with_sac_params(params: SACParams) Self[source]#
class TD3ExperimentBuilder(env_factory: EnvFactory, experiment_config: ExperimentConfig | None = None, sampling_config: SamplingConfig | None = None)[source]#
with_td3_params(params: TD3Params) Self[source]#
class TRPOExperimentBuilder(env_factory: EnvFactory, experiment_config: ExperimentConfig | None = None, sampling_config: SamplingConfig | None = None)[source]#
with_trpo_params(params: TRPOParams) Self[source]#