nano_gpt.datasets
Datasets for training and evaluating the model.
1"""Datasets for training and evaluating the model.""" 2 3from . import finewebedu, tinyshakespeare 4 5__all__ = [ 6 "TRAIN_DATASETS", 7 "finewebedu", 8 "tinyshakespeare", 9 "hellaswag", 10] 11 12 13TRAIN_DATASETS_LIST = [ 14 finewebedu.DATASET, 15 tinyshakespeare.DATASET, 16] 17TRAIN_DATASETS = {dataset.name: dataset for dataset in TRAIN_DATASETS_LIST}
TRAIN_DATASETS =
{'finewebedu': TrainDataset(name='finewebedu', load_fn=<function load_dataset>, total_tokens=10000000000, tokens_per_shard=100000000), 'tinyshakespeare': TrainDataset(name='tinyshakespeare', load_fn=<function load_dataset>, total_tokens=301967, tokens_per_shard=400000)}