def prepare_dataset(batch):
audio = batch["audio"]
batch["input_features"] = feature_extractor(audio["array"],
sampling_rate=audio["sampling_rate"]).input_features[0]
batch["labels"] = tokenizer(batch["translation"]).input_ids
return batch
dataset = dataset.map(prepare_dataset,
remove_columns=dataset.column_names["train"],
num_proc=None)
def prepare_dataset(batch):
audio = batch["audio"]
batch["input_features"] = feature_extractor(audio["array"],
sampling_rate=audio["sampling_rate"]).input_features[0]
batch["labels"] = tokenizer(batch["translation"]).input_ids
return batch
dataset = dataset.map(prepare_dataset,
remove_columns=dataset.column_names["train"],
num_proc=None)