stats#


class EpochStats(*, epoch: int, train_collect_stat: CollectStatsBase, test_collect_stat: CollectStats | None, training_stat: TrainingStats, info_stat: InfoStats)[source]#

A data structure for storing epoch statistics.

epoch: int#

The current epoch.

info_stat: InfoStats#

The information of the collector.

test_collect_stat: CollectStats | None#

The statistics of the last call to the test collector.

train_collect_stat: CollectStatsBase#

The statistics of the last call to the training collector.

training_stat: TrainingStats#

The statistics of the last model update step.

class InfoStats(*, gradient_step: int, best_reward: float, best_reward_std: float, train_step: int, train_episode: int, test_step: int, test_episode: int, timing: TimingStats)[source]#

A data structure for storing information about the learning process.

best_reward: float#

The best reward over the test results.

best_reward_std: float#

Standard deviation of the best reward over the test results.

gradient_step: int#

The total gradient step.

test_episode: int#

The total collected episode of test collector.

test_step: int#

The total collected step of test collector.

timing: TimingStats#

The timing statistics.

train_episode: int#

The total collected episode of training collector.

train_step: int#

The total collected step of training collector.

class SequenceSummaryStats(*, mean: float, std: float, max: float, min: float)[source]#

A data structure for storing the statistics of a sequence.

classmethod from_sequence(sequence: Sequence[float | int] | ndarray) SequenceSummaryStats[source]#
max: float#
mean: float#
min: float#
std: float#
class TimingStats(*, total_time: float = 0.0, train_time: float = 0.0, train_time_collect: float = 0.0, train_time_update: float = 0.0, test_time: float = 0.0, update_speed: float = 0.0)[source]#

A data structure for storing timing statistics.

test_time: float = 0.0#

The total time elapsed for testing models.

total_time: float = 0.0#

The total time elapsed.

train_time: float = 0.0#

The total time elapsed for training (collecting samples plus model update).

train_time_collect: float = 0.0#

The total time elapsed for collecting training transitions.

train_time_update: float = 0.0#

The total time elapsed for updating models.

update_speed: float = 0.0#

The speed of updating (env_step per second).