Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 56 additions & 0 deletions test/test_dataloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,41 @@ def __len__(self):
return self.size


class SeedDataset(Dataset):

def __init__(self, size):
self.size = size

def __getitem__(self, idx):
return torch.initial_seed()

def __len__(self):
return self.size


# Inspired by https://stackoverflow.com/a/26703365
# This will ensure that each worker at least processes one data
class SynchronizedSeedDataset(Dataset):

def __init__(self, size, num_workers):
assert size >= num_workers
self.count = multiprocessing.Value('i', 0)
self.barrier = multiprocessing.Semaphore(0)
self.num_workers = num_workers
self.size = size

def __getitem__(self, idx):
self.count.value += 1
if self.count.value == self.num_workers:
self.barrier.release()
self.barrier.acquire()
self.barrier.release()
return torch.initial_seed()

def __len__(self):
return self.size


class TestDataLoader(TestCase):

def setUp(self):
Expand Down Expand Up @@ -220,6 +255,27 @@ def _test_timeout():
finally:
p.terminate()

def test_worker_seed(self):
num_workers = 6
dataset = SynchronizedSeedDataset(num_workers, num_workers)

This comment was marked as off-topic.

dataloader = DataLoader(dataset, batch_size=1, num_workers=num_workers)
seeds = set()
for batch in dataloader:
seeds.add(batch[0])
self.assertEqual(len(seeds), num_workers)

def test_worker_init_fn(self):
# test custom init function
def init_fn(worker_id):
torch.manual_seed(12345)

dataset = SeedDataset(4)
dataloader = DataLoader(dataset, batch_size=2, num_workers=2,
worker_init_fn=init_fn)
for batch in dataloader:
self.assertEqual(12345, batch[0])
self.assertEqual(12345, batch[1])

This comment was marked as off-topic.

This comment was marked as off-topic.

This comment was marked as off-topic.

This comment was marked as off-topic.

This comment was marked as off-topic.


def test_shuffle(self):
self._test_shuffle(DataLoader(self.dataset, shuffle=True))

Expand Down
40 changes: 31 additions & 9 deletions torch/utils/data/dataloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
_remove_worker_pids, _error_if_any_worker_fails
from .sampler import SequentialSampler, RandomSampler, BatchSampler
import signal
import functools
import collections
import re
import sys
Expand All @@ -30,7 +31,7 @@ def __init__(self, exc_info):
self.exc_msg = "".join(traceback.format_exception(*exc_info))


def _worker_loop(dataset, index_queue, data_queue, collate_fn):
def _worker_loop(dataset, index_queue, data_queue, collate_fn, seed, init_fn, worker_id):
global _use_shared_memory
_use_shared_memory = True

Expand All @@ -41,6 +42,11 @@ def _worker_loop(dataset, index_queue, data_queue, collate_fn):
_set_worker_signal_handlers()

torch.set_num_threads(1)
torch.manual_seed(seed)

if init_fn is not None:
init_fn(worker_id)

while True:
r = index_queue.get()
if r is None:
Expand Down Expand Up @@ -183,6 +189,7 @@ def __init__(self, loader):
self.sample_iter = iter(self.batch_sampler)

if self.num_workers > 0:
self.worker_init_fn = loader.worker_init_fn
self.index_queue = multiprocessing.SimpleQueue()
self.worker_result_queue = multiprocessing.SimpleQueue()
self.batches_outstanding = 0
Expand All @@ -192,15 +199,13 @@ def __init__(self, loader):
self.rcvd_idx = 0
self.reorder_dict = {}

base_seed = torch.LongTensor(1).random_()[0]

This comment was marked as off-topic.

self.workers = [
multiprocessing.Process(
target=_worker_loop,
args=(self.dataset, self.index_queue, self.worker_result_queue, self.collate_fn))
for _ in range(self.num_workers)]

for w in self.workers:
w.daemon = True # ensure that the worker exits on process exit
w.start()
args=(self.dataset, self.index_queue, self.worker_result_queue, self.collate_fn,
base_seed + i, self.worker_init_fn, i))
for i in range(self.num_workers)]

if self.pin_memory or self.timeout > 0:
self.data_queue = queue.Queue()
Expand All @@ -212,6 +217,10 @@ def __init__(self, loader):
else:
self.data_queue = self.worker_result_queue

for w in self.workers:
w.daemon = True # ensure that the worker exits on process exit
w.start()

_update_worker_pids(id(self), tuple(w.pid for w in self.workers))
_set_SIGCHLD_handler()
self.worker_pids_set = True
Expand Down Expand Up @@ -326,7 +335,7 @@ class DataLoader(object):
indices at a time. Mutually exclusive with batch_size, shuffle,
sampler, and drop_last.
num_workers (int, optional): how many subprocesses to use for data
loading. 0 means that the data will be loaded in the main process
loading. 0 means that the data will be loaded in the main process.
(default: 0)
collate_fn (callable, optional): merges a list of samples to form a mini-batch.
pin_memory (bool, optional): If ``True``, the data loader will copy tensors
Expand All @@ -337,18 +346,31 @@ class DataLoader(object):
will be smaller. (default: False)
timeout (numeric, optional): if positive, the timeout value for collecting a batch
from workers. Should always be non-negative. (default: 0)
worker_init_fn (callable, optional): If not None, this will be called on each
worker subprocess with the worker id as input, after seeding and before data
loading. (default: None)

.. note:: By default, each worker will have its PyTorch seed set to
``base_seed + worker_id``, where ``base_seed`` is a long generated
by main process using its RNG. You may use ``torch.initial_seed()`` to access
this value in :attr:`worker_init_fn`, which can be used to set other seeds
(e.g. NumPy) before data loading.

.. warning:: If ``spawn'' start method is used, :attr:`worker_init_fn` cannot be an
unpicklable object, e.g., a lambda function.

This comment was marked as off-topic.

"""

def __init__(self, dataset, batch_size=1, shuffle=False, sampler=None, batch_sampler=None,
num_workers=0, collate_fn=default_collate, pin_memory=False, drop_last=False,
timeout=0):
timeout=0, worker_init_fn=None):
self.dataset = dataset
self.batch_size = batch_size
self.num_workers = num_workers
self.collate_fn = collate_fn
self.pin_memory = pin_memory
self.drop_last = drop_last
self.timeout = timeout
self.worker_init_fn = worker_init_fn

if timeout < 0:
raise ValueError('timeout option should be non-negative')
Expand Down