From c5eb488b463f57c01cbf7e6637a8bd7177c7b63d Mon Sep 17 00:00:00 2001
From: SigureMo <sigure.qaq@gmail.com>
Date: Thu, 27 Jul 2023 03:06:54 +0800
Subject: [PATCH 1/7] [xdoctest] reformat example code with google style in
 `paddle/io`

---
 python/paddle/io/dataloader/batch_sampler.py | 155 ++++---
 python/paddle/io/dataloader/dataset.py       | 443 ++++++++++---------
 python/paddle/io/dataloader/sampler.py       | 153 ++++---
 python/paddle/io/dataloader/worker.py        |  91 ++--
 python/paddle/io/reader.py                   | 122 +++--
 5 files changed, 505 insertions(+), 459 deletions(-)

diff --git a/python/paddle/io/dataloader/batch_sampler.py b/python/paddle/io/dataloader/batch_sampler.py
index 190e9240900f8..b8349fd6d92ab 100644
--- a/python/paddle/io/dataloader/batch_sampler.py
+++ b/python/paddle/io/dataloader/batch_sampler.py
@@ -58,40 +58,37 @@ class BatchSampler(Sampler):
 
         .. code-block:: python
 
-            from paddle.io import RandomSampler, BatchSampler, Dataset
-
-            # init with dataset
-            class RandomDataset(Dataset):
-                def __init__(self, num_samples):
-                    self.num_samples = num_samples
-
-                def __getitem__(self, idx):
-                    image = np.random.random([784]).astype('float32')
-                    label = np.random.randint(0, 9, (1, )).astype('int64')
-                    return image, label
-
-                def __len__(self):
-                    return self.num_samples
-
-            bs = BatchSampler(dataset=RandomDataset(100),
-                              shuffle=False,
-                              batch_size=16,
-                              drop_last=False)
-
-            for batch_indices in bs:
-                print(batch_indices)
-
-            # init with sampler
-            sampler = RandomSampler(RandomDataset(100))
-            bs = BatchSampler(sampler=sampler,
-                              batch_size=8,
-                              drop_last=True)
-
-            for batch_indices in bs:
-                print(batch_indices)
-
-
-
+            >>> from paddle.io import RandomSampler, BatchSampler, Dataset
+
+            >>> # init with dataset
+            >>> class RandomDataset(Dataset):
+            ...     def __init__(self, num_samples):
+            ...         self.num_samples = num_samples
+            ...
+            ...     def __getitem__(self, idx):
+            ...         image = np.random.random([784]).astype('float32')
+            ...         label = np.random.randint(0, 9, (1, )).astype('int64')
+            ...         return image, label
+            ...
+            ...     def __len__(self):
+            ...         return self.num_samples
+            ...
+            >>> bs = BatchSampler(dataset=RandomDataset(100),
+            ...                     shuffle=False,
+            ...                     batch_size=16,
+            ...                     drop_last=False)
+            ...
+            >>> for batch_indices in bs:
+            ...     print(batch_indices)
+            ...
+            >>> # init with sampler
+            >>> sampler = RandomSampler(RandomDataset(100))
+            >>> bs = BatchSampler(sampler=sampler,
+            ...                     batch_size=8,
+            ...                     drop_last=True)
+            ...
+            >>> for batch_indices in bs:
+            ...     print(batch_indices)
     """
 
     def __init__(
@@ -203,29 +200,29 @@ class DistributedBatchSampler(BatchSampler):
     Examples:
         .. code-block:: python
 
-            import numpy as np
-
-            from paddle.io import Dataset, DistributedBatchSampler
-
-            # init with dataset
-            class RandomDataset(Dataset):
-                def __init__(self, num_samples):
-                    self.num_samples = num_samples
-
-                def __getitem__(self, idx):
-                    image = np.random.random([784]).astype('float32')
-                    label = np.random.randint(0, 9, (1, )).astype('int64')
-                    return image, label
-
-                def __len__(self):
-                    return self.num_samples
-
-            dataset = RandomDataset(100)
-            sampler = DistributedBatchSampler(dataset, batch_size=64)
-
-            for data in sampler:
-                # do something
-                break
+            >>> import numpy as np
+
+            >>> from paddle.io import Dataset, DistributedBatchSampler
+
+            >>> # init with dataset
+            >>> class RandomDataset(Dataset):
+            ...     def __init__(self, num_samples):
+            ...         self.num_samples = num_samples
+            ...
+            ...     def __getitem__(self, idx):
+            ...         image = np.random.random([784]).astype('float32')
+            ...         label = np.random.randint(0, 9, (1, )).astype('int64')
+            ...         return image, label
+            ...
+            ...     def __len__(self):
+            ...         return self.num_samples
+            ...
+            >>> dataset = RandomDataset(100)
+            >>> sampler = DistributedBatchSampler(dataset, batch_size=64)
+
+            >>> for data in sampler:
+            ...     # do something
+            ...     break
     """
 
     def __init__(
@@ -339,27 +336,27 @@ def set_epoch(self, epoch):
         Examples:
             .. code-block:: python
 
-                import numpy as np
-
-                from paddle.io import Dataset, DistributedBatchSampler
-
-                # init with dataset
-                class RandomDataset(Dataset):
-                    def __init__(self, num_samples):
-                        self.num_samples = num_samples
-
-                    def __getitem__(self, idx):
-                        image = np.random.random([784]).astype('float32')
-                        label = np.random.randint(0, 9, (1, )).astype('int64')
-                        return image, label
-
-                    def __len__(self):
-                        return self.num_samples
-
-                dataset = RandomDataset(100)
-                sampler = DistributedBatchSampler(dataset, batch_size=64)
-
-                for epoch in range(10):
-                    sampler.set_epoch(epoch)
+                >>> import numpy as np
+
+                >>> from paddle.io import Dataset, DistributedBatchSampler
+
+                >>> # init with dataset
+                >>> class RandomDataset(Dataset):
+                ...     def __init__(self, num_samples):
+                ...         self.num_samples = num_samples
+                ...
+                ...     def __getitem__(self, idx):
+                ...         image = np.random.random([784]).astype('float32')
+                ...         label = np.random.randint(0, 9, (1, )).astype('int64')
+                ...         return image, label
+                ...
+                ...     def __len__(self):
+                ...         return self.num_samples
+                ...
+                >>> dataset = RandomDataset(100)
+                >>> sampler = DistributedBatchSampler(dataset, batch_size=64)
+
+                >>> for epoch in range(10):
+                ...     sampler.set_epoch(epoch)
         """
         self.epoch = epoch
diff --git a/python/paddle/io/dataloader/dataset.py b/python/paddle/io/dataloader/dataset.py
index 3e0458ae9b700..5253c647b1db3 100755
--- a/python/paddle/io/dataloader/dataset.py
+++ b/python/paddle/io/dataloader/dataset.py
@@ -37,26 +37,25 @@ class Dataset:
 
         .. code-block:: python
 
-            import numpy as np
-            from paddle.io import Dataset
-
-            # define a random dataset
-            class RandomDataset(Dataset):
-                def __init__(self, num_samples):
-                    self.num_samples = num_samples
-
-                def __getitem__(self, idx):
-                    image = np.random.random([784]).astype('float32')
-                    label = np.random.randint(0, 9, (1, )).astype('int64')
-                    return image, label
-
-                def __len__(self):
-                    return self.num_samples
-
-            dataset = RandomDataset(10)
-            for i in range(len(dataset)):
-                print(dataset[i])
-
+            >>> import numpy as np
+            >>> from paddle.io import Dataset
+
+            >>> # define a random dataset
+            >>> class RandomDataset(Dataset):
+            ...     def __init__(self, num_samples):
+            ...         self.num_samples = num_samples
+            ...
+            ...     def __getitem__(self, idx):
+            ...         image = np.random.random([784]).astype('float32')
+            ...         label = np.random.randint(0, 9, (1, )).astype('int64')
+            ...         return image, label
+            ...
+            ...     def __len__(self):
+            ...         return self.num_samples
+            ...
+            >>> dataset = RandomDataset(10)
+            >>> for i in range(len(dataset)):
+            ...     print(dataset[i])
     """
 
     def __init__(self):
@@ -95,23 +94,23 @@ class IterableDataset(Dataset):
         .. code-block:: python
             :name: code-example1
 
-            import numpy as np
-            from paddle.io import IterableDataset
-
-            # define a random dataset
-            class RandomDataset(IterableDataset):
-                def __init__(self, num_samples):
-                    self.num_samples = num_samples
-
-                def __iter__(self):
-                    for i in range(self.num_samples):
-                        image = np.random.random([784]).astype('float32')
-                        label = np.random.randint(0, 9, (1, )).astype('int64')
-                        yield image, label
-
-            dataset = RandomDataset(10)
-            for img, lbl in dataset:
-                print(img, lbl)
+            >>> import numpy as np
+            >>> from paddle.io import IterableDataset
+
+            >>> # define a random dataset
+            >>> class RandomDataset(IterableDataset):
+            ...     def __init__(self, num_samples):
+            ...         self.num_samples = num_samples
+            ...
+            ...     def __iter__(self):
+            ...         for i in range(self.num_samples):
+            ...             image = np.random.random([784]).astype('float32')
+            ...             label = np.random.randint(0, 9, (1, )).astype('int64')
+            ...             yield image, label
+            ...
+            >>> dataset = RandomDataset(10)
+            >>> for img, lbl in dataset:
+            ...     print(img, lbl)
 
     When :attr:`num_workers > 0`, each worker has a different copy of the dataset object and
     will yield whole dataset samples, which means samples in dataset will be repeated in
@@ -125,87 +124,113 @@ def __iter__(self):
         .. code-block:: python
             :name: code-example2
 
-            import math
-            import paddle
-            import numpy as np
-            from paddle.io import IterableDataset, DataLoader, get_worker_info
-
-            class SplitedIterableDataset(IterableDataset):
-                def __init__(self, start, end):
-                    self.start = start
-                    self.end = end
-
-                def __iter__(self):
-                    worker_info = get_worker_info()
-                    if worker_info is None:
-                        iter_start = self.start
-                        iter_end = self.end
-                    else:
-                        per_worker = int(
-                            math.ceil((self.end - self.start) / float(
-                                worker_info.num_workers)))
-                        worker_id = worker_info.id
-                        iter_start = self.start + worker_id * per_worker
-                        iter_end = min(iter_start + per_worker, self.end)
-
-                    for i in range(iter_start, iter_end):
-                        yield np.array([i])
-
-            dataset = SplitedIterableDataset(start=2, end=9)
-            dataloader = DataLoader(
-                dataset,
-                num_workers=2,
-                batch_size=1,
-                drop_last=True)
-
-            for data in dataloader:
-                print(data)
-                # outputs: [2, 5, 3, 6, 4, 7]
+            >>> import math
+            >>> import paddle
+            >>> import numpy as np
+            >>> from paddle.io import IterableDataset, DataLoader, get_worker_info
+
+            >>> class SplitedIterableDataset(IterableDataset):
+            ...     def __init__(self, start, end):
+            ...         self.start = start
+            ...         self.end = end
+            ...
+            ...     def __iter__(self):
+            ...         worker_info = get_worker_info()
+            ...         if worker_info is None:
+            ...             iter_start = self.start
+            ...             iter_end = self.end
+            ...         else:
+            ...             per_worker = int(
+            ...                 math.ceil((self.end - self.start) / float(
+            ...                     worker_info.num_workers)))
+            ...             worker_id = worker_info.id
+            ...             iter_start = self.start + worker_id * per_worker
+            ...             iter_end = min(iter_start + per_worker, self.end)
+            ...
+            ...         for i in range(iter_start, iter_end):
+            ...             yield np.array([i])
+            ...
+            >>> dataset = SplitedIterableDataset(start=2, end=9)
+            >>> dataloader = DataLoader(
+            ...     dataset,
+            ...     num_workers=2,
+            ...     batch_size=1,
+            ...     drop_last=True)
+            ...
+            >>> for data in dataloader:
+            ...     print(data)
+            Tensor(shape=[1, 1], dtype=int64, place=Place(cpu), stop_gradient=True,
+                [[2]])
+            Tensor(shape=[1, 1], dtype=int64, place=Place(cpu), stop_gradient=True,
+                [[3]])
+            Tensor(shape=[1, 1], dtype=int64, place=Place(cpu), stop_gradient=True,
+                [[4]])
+            Tensor(shape=[1, 1], dtype=int64, place=Place(cpu), stop_gradient=True,
+                [[5]])
+            Tensor(shape=[1, 1], dtype=int64, place=Place(cpu), stop_gradient=True,
+                [[6]])
+            Tensor(shape=[1, 1], dtype=int64, place=Place(cpu), stop_gradient=True,
+                [[7]])
+            Tensor(shape=[1, 1], dtype=int64, place=Place(cpu), stop_gradient=True,
+                [[8]])
 
     splitting data copy in each worker by :code:`worker_init_fn`
 
         .. code-block:: python
             :name: code-example3
 
-            import math
-            import paddle
-            import numpy as np
-            from paddle.io import IterableDataset, DataLoader, get_worker_info
-
-            class RangeIterableDataset(IterableDataset):
-                def __init__(self, start, end):
-                    self.start = start
-                    self.end = end
-
-                def __iter__(self):
-                    for i in range(self.start, self.end):
-                        yield np.array([i])
-
-            dataset = RangeIterableDataset(start=2, end=9)
-
-            def worker_init_fn(worker_id):
-                worker_info = get_worker_info()
-
-                dataset = worker_info.dataset
-                start = dataset.start
-                end = dataset.end
-                num_per_worker = int(
-                    math.ceil((end - start) / float(worker_info.num_workers)))
-
-                worker_id = worker_info.id
-                dataset.start = start + worker_id * num_per_worker
-                dataset.end = min(dataset.start + num_per_worker, end)
-
-            dataloader = DataLoader(
-                dataset,
-                num_workers=2,
-                batch_size=1,
-                drop_last=True,
-                worker_init_fn=worker_init_fn)
-
-            for data in dataloader:
-                print(data)
-            # outputs: [2, 5, 3, 6, 4, 7]
+            >>> import math
+            >>> import paddle
+            >>> import numpy as np
+            >>> from paddle.io import IterableDataset, DataLoader, get_worker_info
+
+            >>> class RangeIterableDataset(IterableDataset):
+            ...     def __init__(self, start, end):
+            ...         self.start = start
+            ...         self.end = end
+            ...
+            ...     def __iter__(self):
+            ...         for i in range(self.start, self.end):
+            ...             yield np.array([i])
+            ...
+            >>> dataset = RangeIterableDataset(start=2, end=9)
+
+            >>> def worker_init_fn(worker_id):
+            ...     worker_info = get_worker_info()
+            ...
+            ...     dataset = worker_info.dataset
+            ...     start = dataset.start
+            ...     end = dataset.end
+            ...     num_per_worker = int(
+            ...         math.ceil((end - start) / float(worker_info.num_workers)))
+            ...
+            ...     worker_id = worker_info.id
+            ...     dataset.start = start + worker_id * num_per_worker
+            ...     dataset.end = min(dataset.start + num_per_worker, end)
+            ...
+            >>> dataloader = DataLoader(
+            ...     dataset,
+            ...     num_workers=2,
+            ...     batch_size=1,
+            ...     drop_last=True,
+            ...     worker_init_fn=worker_init_fn)
+            ...
+            >>> for data in dataloader:
+            ...     print(data)
+            Tensor(shape=[1, 1], dtype=int64, place=Place(cpu), stop_gradient=True,
+                [[2]])
+            Tensor(shape=[1, 1], dtype=int64, place=Place(cpu), stop_gradient=True,
+                [[3]])
+            Tensor(shape=[1, 1], dtype=int64, place=Place(cpu), stop_gradient=True,
+                [[4]])
+            Tensor(shape=[1, 1], dtype=int64, place=Place(cpu), stop_gradient=True,
+                [[5]])
+            Tensor(shape=[1, 1], dtype=int64, place=Place(cpu), stop_gradient=True,
+                [[6]])
+            Tensor(shape=[1, 1], dtype=int64, place=Place(cpu), stop_gradient=True,
+                [[7]])
+            Tensor(shape=[1, 1], dtype=int64, place=Place(cpu), stop_gradient=True,
+                [[8]])
 
     """
 
@@ -249,22 +274,21 @@ class TensorDataset(Dataset):
 
         .. code-block:: python
 
-            import numpy as np
-            import paddle
-            from paddle.io import TensorDataset
-
+            >>> import numpy as np
+            >>> import paddle
+            >>> from paddle.io import TensorDataset
 
-            input_np = np.random.random([2, 3, 4]).astype('float32')
-            input = paddle.to_tensor(input_np)
-            label_np = np.random.random([2, 1]).astype('int32')
-            label = paddle.to_tensor(label_np)
 
-            dataset = TensorDataset([input, label])
+            >>> input_np = np.random.random([2, 3, 4]).astype('float32')
+            >>> input = paddle.to_tensor(input_np)
+            >>> label_np = np.random.random([2, 1]).astype('int32')
+            >>> label = paddle.to_tensor(label_np)
 
-            for i in range(len(dataset)):
-                input, label = dataset[i]
-                print(input, label)
+            >>> dataset = TensorDataset([input, label])
 
+            >>> for i in range(len(dataset)):
+            ...     input, label = dataset[i]
+            ...     print(input, label)
     """
 
     def __init__(self, tensors):
@@ -309,32 +333,31 @@ class ComposeDataset(Dataset):
 
         .. code-block:: python
 
-            import numpy as np
-            import paddle
-            from paddle.io import Dataset, ComposeDataset
-
-
-            # define a random dataset
-            class RandomDataset(Dataset):
-                def __init__(self, num_samples):
-                    self.num_samples = num_samples
-
-                def __getitem__(self, idx):
-                    image = np.random.random([32]).astype('float32')
-                    label = np.random.randint(0, 9, (1, )).astype('int64')
-                    return image, label
-
-                def __len__(self):
-                    return self.num_samples
-
-            dataset = ComposeDataset([RandomDataset(10), RandomDataset(10)])
-            for i in range(len(dataset)):
-                image1, label1, image2, label2 = dataset[i]
-                print(image1)
-                print(label1)
-                print(image2)
-                print(label2)
-
+            >>> import numpy as np
+            >>> import paddle
+            >>> from paddle.io import Dataset, ComposeDataset
+
+
+            >>> # define a random dataset
+            >>> class RandomDataset(Dataset):
+            ...     def __init__(self, num_samples):
+            ...         self.num_samples = num_samples
+            ...
+            ...     def __getitem__(self, idx):
+            ...         image = np.random.random([32]).astype('float32')
+            ...         label = np.random.randint(0, 9, (1, )).astype('int64')
+            ...         return image, label
+            ...
+            ...     def __len__(self):
+            ...         return self.num_samples
+            ...
+            >>> dataset = ComposeDataset([RandomDataset(10), RandomDataset(10)])
+            >>> for i in range(len(dataset)):
+            ...     image1, label1, image2, label2 = dataset[i]
+            ...     print(image1)
+            ...     print(label1)
+            ...     print(image2)
+            ...     print(label2)
     """
 
     def __init__(self, datasets):
@@ -379,26 +402,25 @@ class ChainDataset(IterableDataset):
 
         .. code-block:: python
 
-            import numpy as np
-            import paddle
-            from paddle.io import IterableDataset, ChainDataset
-
-
-            # define a random dataset
-            class RandomDataset(IterableDataset):
-                def __init__(self, num_samples):
-                    self.num_samples = num_samples
-
-                def __iter__(self):
-                    for i in range(10):
-                        image = np.random.random([32]).astype('float32')
-                        label = np.random.randint(0, 9, (1, )).astype('int64')
-                        yield image, label
-
-            dataset = ChainDataset([RandomDataset(10), RandomDataset(10)])
-            for image, label in iter(dataset):
-                print(image, label)
-
+            >>> import numpy as np
+            >>> import paddle
+            >>> from paddle.io import IterableDataset, ChainDataset
+
+
+            >>> # define a random dataset
+            >>> class RandomDataset(IterableDataset):
+            ...     def __init__(self, num_samples):
+            ...         self.num_samples = num_samples
+            ...
+            ...     def __iter__(self):
+            ...         for i in range(10):
+            ...             image = np.random.random([32]).astype('float32')
+            ...             label = np.random.randint(0, 9, (1, )).astype('int64')
+            ...             yield image, label
+            ...
+            >>> dataset = ChainDataset([RandomDataset(10), RandomDataset(10)])
+            >>> for image, label in iter(dataset):
+            ...     print(image, label)
     """
 
     def __init__(self, datasets):
@@ -430,18 +452,18 @@ class Subset(Dataset):
 
         .. code-block:: python
 
-            import paddle
-            from paddle.io import Subset
+            >>> import paddle
+            >>> from paddle.io import Subset
 
-            # example 1:
-            a = paddle.io.Subset(dataset=range(1, 4), indices=[0, 2])
-            print(list(a))
-            # [1, 3]
+            >>> # example 1:
+            >>> a = paddle.io.Subset(dataset=range(1, 4), indices=[0, 2])
+            >>> print(list(a))
+            [1, 3]
 
-            # example 2:
-            b = paddle.io.Subset(dataset=range(1, 4), indices=[1, 1])
-            print(list(b))
-            # [2, 2]
+            >>> # example 2:
+            >>> b = paddle.io.Subset(dataset=range(1, 4), indices=[1, 1])
+            >>> print(list(b))
+            [2, 2]
     """
 
     def __init__(self, dataset, indices):
@@ -472,31 +494,34 @@ def random_split(dataset, lengths, generator=None):
 
         .. code-block:: python
 
-            import paddle
-            from paddle.io import random_split
-
-            a_list = paddle.io.random_split(range(10), [3, 7])
-            print(len(a_list))
-            # 2
-
-            for idx, v in enumerate(a_list[0]):
-                print(idx, v)
-
-            # output of the first subset
-            # 0 1
-            # 1 3
-            # 2 9
-
-            for idx, v in enumerate(a_list[1]):
-                print(idx, v)
-            # output of the second subset
-            # 0 5
-            # 1 7
-            # 2 8
-            # 3 6
-            # 4 0
-            # 5 2
-            # 6 4
+            >>> import paddle
+            >>> from paddle.io import random_split
+
+            >>> a_list = paddle.io.random_split(range(10), [3, 7])
+            >>> print(len(a_list))
+            2
+
+            >>> # output of the first subset
+            >>> for idx, v in enumerate(a_list[0]):
+            ...     print(idx, v)
+            >>> # doctest: +SKIP
+            0 1
+            1 3
+            2 9
+            >>> # doctest: -SKIP
+
+            >>> # output of the second subset
+            >>> for idx, v in enumerate(a_list[1]):
+            ...     print(idx, v)
+            >>> # doctest: +SKIP
+            0 5
+            1 7
+            2 8
+            3 6
+            4 0
+            5 2
+            6 4
+            >>> # doctest: -SKIP
     """
     # Cannot verify that dataset is Sized
     if sum(lengths) != len(dataset):  # type: ignore
@@ -528,8 +553,12 @@ def _accumulate(iterable, fn=lambda x, y: x + y):
 
         .. code-block:: python
 
-            _accumulate([1,2,3,4,5]) --> 1 3 6 10 15
-            _accumulate([1,2,3,4,5], operator.mul) --> 1 2 6 24 120
+            >>> list(_accumulate([1, 2, 3, 4, 5]))
+            [1, 3, 6, 10, 15]
+
+            >>> import operator
+            >>> list(_accumulate([1, 2, 3, 4, 5], operator.mul))
+            [1, 2, 6, 24, 120]
     """
 
     it = iter(iterable)
diff --git a/python/paddle/io/dataloader/sampler.py b/python/paddle/io/dataloader/sampler.py
index aa8a4e649c76c..e85f51510b704 100644
--- a/python/paddle/io/dataloader/sampler.py
+++ b/python/paddle/io/dataloader/sampler.py
@@ -44,34 +44,39 @@ class Sampler:
 
         .. code-block:: python
 
-            from paddle.io import Dataset, Sampler
-
-            class RandomDataset(Dataset):
-                def __init__(self, num_samples):
-                    self.num_samples = num_samples
-
-                def __getitem__(self, idx):
-                    image = np.random.random([784]).astype('float32')
-                    label = np.random.randint(0, 9, (1, )).astype('int64')
-                    return image, label
-
-                def __len__(self):
-                    return self.num_samples
-
-            class MySampler(Sampler):
-                def __init__(self, data_source):
-                    self.data_source = data_source
-
-                def __iter__(self):
-                    return iter(range(len(self.data_source)))
-
-                def __len__(self):
-                    return len(self.data_source)
-
-            sampler = MySampler(data_source=RandomDataset(100))
-
-            for index in sampler:
-                print(index)
+            >>> from paddle.io import Dataset, Sampler
+
+            >>> class RandomDataset(Dataset):
+            ...     def __init__(self, num_samples):
+            ...         self.num_samples = num_samples
+            ...
+            ...     def __getitem__(self, idx):
+            ...         image = np.random.random([784]).astype('float32')
+            ...         label = np.random.randint(0, 9, (1, )).astype('int64')
+            ...         return image, label
+            ...
+            ...     def __len__(self):
+            ...         return self.num_samples
+            ...
+            >>> class MySampler(Sampler):
+            ...     def __init__(self, data_source):
+            ...         self.data_source = data_source
+            ...
+            ...     def __iter__(self):
+            ...         return iter(range(len(self.data_source)))
+            ...
+            ...     def __len__(self):
+            ...         return len(self.data_source)
+            ...
+            >>> sampler = MySampler(data_source=RandomDataset(100))
+
+            >>> for index in sampler:
+            ...     print(index)
+            0
+            1
+            2
+            ...
+            99
 
     see `paddle.io.BatchSampler`
     see `paddle.io.DataLoader`
@@ -105,24 +110,29 @@ class SequenceSampler(Sampler):
 
         .. code-block:: python
 
-            from paddle.io import Dataset, SequenceSampler
-
-            class RandomDataset(Dataset):
-                def __init__(self, num_samples):
-                    self.num_samples = num_samples
-
-                def __getitem__(self, idx):
-                    image = np.random.random([784]).astype('float32')
-                    label = np.random.randint(0, 9, (1, )).astype('int64')
-                    return image, label
-
-                def __len__(self):
-                    return self.num_samples
-
-            sampler = SequenceSampler(data_source=RandomDataset(100))
-
-            for index in sampler:
-                print(index)
+            >>> from paddle.io import Dataset, SequenceSampler
+
+            >>> class RandomDataset(Dataset):
+            ...     def __init__(self, num_samples):
+            ...         self.num_samples = num_samples
+            ...
+            ...     def __getitem__(self, idx):
+            ...         image = np.random.random([784]).astype('float32')
+            ...         label = np.random.randint(0, 9, (1, )).astype('int64')
+            ...         return image, label
+            ...
+            ...     def __len__(self):
+            ...         return self.num_samples
+            ...
+            >>> sampler = SequenceSampler(data_source=RandomDataset(100))
+
+            >>> for index in sampler:
+            ...     print(index)
+            0
+            1
+            2
+            ...
+            99
 
     see `paddle.io.Sampler`
     """
@@ -160,25 +170,24 @@ class RandomSampler(Sampler):
 
         .. code-block:: python
 
-            from paddle.io import Dataset, RandomSampler
-
-            class RandomDataset(Dataset):
-                def __init__(self, num_samples):
-                    self.num_samples = num_samples
-
-                def __getitem__(self, idx):
-                    image = np.random.random([784]).astype('float32')
-                    label = np.random.randint(0, 9, (1, )).astype('int64')
-                    return image, label
-
-                def __len__(self):
-                    return self.num_samples
-
-            sampler = RandomSampler(data_source=RandomDataset(100))
-
-            for index in sampler:
-                print(index)
-
+            >>> from paddle.io import Dataset, RandomSampler
+
+            >>> class RandomDataset(Dataset):
+            ...     def __init__(self, num_samples):
+            ...         self.num_samples = num_samples
+            ...
+            ...     def __getitem__(self, idx):
+            ...         image = np.random.random([784]).astype('float32')
+            ...         label = np.random.randint(0, 9, (1, )).astype('int64')
+            ...         return image, label
+            ...
+            ...     def __len__(self):
+            ...         return self.num_samples
+            ...
+            >>> sampler = RandomSampler(data_source=RandomDataset(100))
+
+            >>> for index in sampler:
+            ...     print(index)
     """
 
     def __init__(
@@ -288,14 +297,14 @@ class WeightedRandomSampler(Sampler):
 
         .. code-block:: python
 
-            from paddle.io import WeightedRandomSampler
-
-            sampler = WeightedRandomSampler(weights=[0.1, 0.3, 0.5, 0.7, 0.2],
-                                            num_samples=5,
-                                            replacement=True)
+            >>> from paddle.io import WeightedRandomSampler
 
-            for index in sampler:
-                print(index)
+            >>> sampler = WeightedRandomSampler(weights=[0.1, 0.3, 0.5, 0.7, 0.2],
+            ...                                 num_samples=5,
+            ...                                 replacement=True)
+            ...
+            >>> for index in sampler:
+            ...     print(index)
     """
 
     def __init__(self, weights, num_samples, replacement=True):
diff --git a/python/paddle/io/dataloader/worker.py b/python/paddle/io/dataloader/worker.py
index 5eeeb849fc025..6c9bdf3de0d9a 100644
--- a/python/paddle/io/dataloader/worker.py
+++ b/python/paddle/io/dataloader/worker.py
@@ -94,51 +94,64 @@ def get_worker_info():
     Returns:
         WorkerInfo: an instance of WorkerInfo which contains fields above.
 
-    .. note::
+    Notes:
         For more usage and examples, please see :code:`paddle.io.IterableDataset`
 
     Example:
 
         .. code-block:: python
 
-            import math
-            import paddle
-            import numpy as np
-            from paddle.io import IterableDataset, DataLoader, get_worker_info
-
-            class SplitedIterableDataset(IterableDataset):
-                def __init__(self, start, end):
-                    self.start = start
-                    self.end = end
-
-                def __iter__(self):
-                    worker_info = get_worker_info()
-                    if worker_info is None:
-                        iter_start = self.start
-                        iter_end = self.end
-                    else:
-                        per_worker = int(
-                            math.ceil((self.end - self.start) / float(
-                                worker_info.num_workers)))
-                        worker_id = worker_info.id
-                        iter_start = self.start + worker_id * per_worker
-                        iter_end = min(iter_start + per_worker, self.end)
-
-                    for i in range(iter_start, iter_end):
-                        yield np.array([i])
-
-            place = paddle.CPUPlace()
-            dataset = SplitedIterableDataset(start=2, end=9)
-            dataloader = DataLoader(
-                dataset,
-                places=place,
-                num_workers=2,
-                batch_size=1,
-                drop_last=True)
-
-            for data in dataloader:
-                print(data)
-            # outputs: [2, 5, 3, 6, 4, 7]
+            >>> import math
+            >>> import paddle
+            >>> import numpy as np
+            >>> from paddle.io import IterableDataset, DataLoader, get_worker_info
+
+            >>> class SplitedIterableDataset(IterableDataset):
+            ...     def __init__(self, start, end):
+            ...         self.start = start
+            ...         self.end = end
+            ...
+            ...     def __iter__(self):
+            ...         worker_info = get_worker_info()
+            ...         if worker_info is None:
+            ...             iter_start = self.start
+            ...             iter_end = self.end
+            ...         else:
+            ...             per_worker = int(
+            ...                 math.ceil((self.end - self.start) / float(
+            ...                     worker_info.num_workers)))
+            ...             worker_id = worker_info.id
+            ...             iter_start = self.start + worker_id * per_worker
+            ...             iter_end = min(iter_start + per_worker, self.end)
+            ...
+            ...         for i in range(iter_start, iter_end):
+            ...             yield np.array([i])
+            ...
+            >>> place = paddle.CPUPlace()
+            >>> dataset = SplitedIterableDataset(start=2, end=9)
+            >>> dataloader = DataLoader(
+            ...     dataset,
+            ...     places=place,
+            ...     num_workers=2,
+            ...     batch_size=1,
+            ...     drop_last=True)
+            ...
+            >>> for data in dataloader:
+            ...     print(data)
+            Tensor(shape=[1, 1], dtype=int64, place=Place(cpu), stop_gradient=True,
+            [[2]])
+            Tensor(shape=[1, 1], dtype=int64, place=Place(cpu), stop_gradient=True,
+            [[3]])
+            Tensor(shape=[1, 1], dtype=int64, place=Place(cpu), stop_gradient=True,
+            [[4]])
+            Tensor(shape=[1, 1], dtype=int64, place=Place(cpu), stop_gradient=True,
+            [[5]])
+            Tensor(shape=[1, 1], dtype=int64, place=Place(cpu), stop_gradient=True,
+            [[6]])
+            Tensor(shape=[1, 1], dtype=int64, place=Place(cpu), stop_gradient=True,
+            [[7]])
+            Tensor(shape=[1, 1], dtype=int64, place=Place(cpu), stop_gradient=True,
+            [[8]])
 
     """
     return _worker_info
diff --git a/python/paddle/io/reader.py b/python/paddle/io/reader.py
index 861d1253dcfb9..1ee61fa6f2f88 100644
--- a/python/paddle/io/reader.py
+++ b/python/paddle/io/reader.py
@@ -234,7 +234,7 @@ class DataLoader:
 
     For :code:`batch_sampler` please see :code:`paddle.io.BatchSampler`
 
-    .. note::
+    Notes:
         GPU tensor operation is not supported in subprocess currently,
         please don't use GPU tensor operations in pipeline which will
         be performed in subprocess, such as dataset transforms, collte_fn,
@@ -250,7 +250,7 @@ class DataLoader:
     :attr:`collate_fn` or :attr:`default_collate_fn`.
 
 
-    .. note::
+    Notes:
         When automatic batching is disabled, :attr:`default_collate_fn` will
         do nothing to data from dataset.
 
@@ -321,68 +321,66 @@ class DataLoader:
 
         .. code-block:: python
 
-            import numpy as np
-
-            import paddle
-            import paddle.nn as nn
-            import paddle.nn.functional as F
-            from paddle.io import Dataset, BatchSampler, DataLoader
-
-            BATCH_NUM = 20
-            BATCH_SIZE = 16
-            EPOCH_NUM = 4
-
-            IMAGE_SIZE = 784
-            CLASS_NUM = 10
-
-            # define a random dataset
-            class RandomDataset(Dataset):
-                def __init__(self, num_samples):
-                    self.num_samples = num_samples
-
-                def __getitem__(self, idx):
-                    image = np.random.random([IMAGE_SIZE]).astype('float32')
-                    label = np.random.randint(0, CLASS_NUM - 1, (1, )).astype('int64')
-                    return image, label
-
-                def __len__(self):
-                    return self.num_samples
-
-            dataset = RandomDataset(BATCH_NUM * BATCH_SIZE)
-
-            class SimpleNet(nn.Layer):
-                def __init__(self):
-                    super().__init__()
-                    self.fc = nn.Linear(IMAGE_SIZE, CLASS_NUM)
-
-                def forward(self, image, label=None):
-                    return self.fc(image)
-
-            simple_net = SimpleNet()
-            opt = paddle.optimizer.SGD(learning_rate=1e-3,
-                                      parameters=simple_net.parameters())
-
-            loader = DataLoader(dataset,
-                                batch_size=BATCH_SIZE,
-                                shuffle=True,
-                                drop_last=True,
-                                num_workers=2)
-
-            for e in range(EPOCH_NUM):
-                for i, (image, label) in enumerate(loader()):
-                    out = simple_net(image)
-                    loss = F.cross_entropy(out, label)
-                    avg_loss = paddle.mean(loss)
-                    avg_loss.backward()
-                    opt.minimize(avg_loss)
-                    simple_net.clear_gradients()
-                    print("Epoch {} batch {}: loss = {}".format(e, i, np.mean(loss.numpy())))
-
-
-    .. note::
+            >>> import numpy as np
+
+            >>> import paddle
+            >>> import paddle.nn as nn
+            >>> import paddle.nn.functional as F
+            >>> from paddle.io import Dataset, BatchSampler, DataLoader
+
+            >>> BATCH_NUM = 20
+            >>> BATCH_SIZE = 16
+            >>> EPOCH_NUM = 4
+
+            >>> IMAGE_SIZE = 784
+            >>> CLASS_NUM = 10
+
+            >>> # define a random dataset
+            >>> class RandomDataset(Dataset):
+            ...     def __init__(self, num_samples):
+            ...         self.num_samples = num_samples
+            ...
+            ...     def __getitem__(self, idx):
+            ...         image = np.random.random([IMAGE_SIZE]).astype('float32')
+            ...         label = np.random.randint(0, CLASS_NUM - 1, (1, )).astype('int64')
+            ...         return image, label
+            ...
+            ...     def __len__(self):
+            ...         return self.num_samples
+            ...
+            >>> dataset = RandomDataset(BATCH_NUM * BATCH_SIZE)
+
+            >>> class SimpleNet(nn.Layer):
+            ...     def __init__(self):
+            ...         super().__init__()
+            ...         self.fc = nn.Linear(IMAGE_SIZE, CLASS_NUM)
+            ...
+            ...     def forward(self, image, label=None):
+            ...         return self.fc(image)
+            ...
+            >>> simple_net = SimpleNet()
+            >>> opt = paddle.optimizer.SGD(learning_rate=1e-3,
+            ...                             parameters=simple_net.parameters())
+            ...
+            >>> loader = DataLoader(dataset,
+            ...                     batch_size=BATCH_SIZE,
+            ...                     shuffle=True,
+            ...                     drop_last=True,
+            ...                     num_workers=2)
+            ...
+            >>> for e in range(EPOCH_NUM):
+            ...     for i, (image, label) in enumerate(loader()):
+            ...         out = simple_net(image)
+            ...         loss = F.cross_entropy(out, label)
+            ...         avg_loss = paddle.mean(loss)
+            ...         avg_loss.backward()
+            ...         opt.minimize(avg_loss)
+            ...         simple_net.clear_gradients()
+            ...         print("Epoch {} batch {}: loss = {}".format(e, i, np.mean(loss.numpy())))
+
+    Notes:
         For reading iterable dataset with multiprocess Dataloader,
         please see :code:`paddle.io.IterableDataset`
-
     """
 
     def __init__(

From e8a6b31596152321e00a04c42b717f30d75e0ad3 Mon Sep 17 00:00:00 2001
From: SigureMo <sigure.qaq@gmail.com>
Date: Thu, 27 Jul 2023 03:09:40 +0800
Subject: [PATCH 2/7] preview, test=docs_preview


From fbce310a1977cc16d982ead7d4df1240579b81b7 Mon Sep 17 00:00:00 2001
From: SigureMo <sigure.qaq@gmail.com>
Date: Tue, 1 Aug 2023 21:20:08 +0800
Subject: [PATCH 3/7] update example code, test=docs_preview

---
 python/paddle/io/dataloader/batch_sampler.py |  7 +++
 python/paddle/io/dataloader/dataset.py       | 48 ++++++++++----------
 python/paddle/io/dataloader/sampler.py       | 23 ++++++++--
 3 files changed, 49 insertions(+), 29 deletions(-)

diff --git a/python/paddle/io/dataloader/batch_sampler.py b/python/paddle/io/dataloader/batch_sampler.py
index b8349fd6d92ab..78c93151a390d 100644
--- a/python/paddle/io/dataloader/batch_sampler.py
+++ b/python/paddle/io/dataloader/batch_sampler.py
@@ -58,8 +58,10 @@ class BatchSampler(Sampler):
 
         .. code-block:: python
 
+            >>> import numpy as np
             >>> from paddle.io import RandomSampler, BatchSampler, Dataset
 
+            >>> np.random.seed(2023)
             >>> # init with dataset
             >>> class RandomDataset(Dataset):
             ...     def __init__(self, num_samples):
@@ -80,7 +82,9 @@ class BatchSampler(Sampler):
             ...
             >>> for batch_indices in bs:
             ...     print(batch_indices)
+            [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
             ...
+            [96, 97, 98, 99]
             >>> # init with sampler
             >>> sampler = RandomSampler(RandomDataset(100))
             >>> bs = BatchSampler(sampler=sampler,
@@ -89,6 +93,9 @@ class BatchSampler(Sampler):
             ...
             >>> for batch_indices in bs:
             ...     print(batch_indices)
+            [56, 12, 68, 0, 82, 66, 91, 44]
+            ...
+            [53, 17, 22, 86, 52, 3, 92, 33]
     """
 
     def __init__(
diff --git a/python/paddle/io/dataloader/dataset.py b/python/paddle/io/dataloader/dataset.py
index 5253c647b1db3..16a5f0379523f 100755
--- a/python/paddle/io/dataloader/dataset.py
+++ b/python/paddle/io/dataloader/dataset.py
@@ -55,7 +55,8 @@ class Dataset:
             ...
             >>> dataset = RandomDataset(10)
             >>> for i in range(len(dataset)):
-            ...     print(dataset[i])
+            ...     image, label = dataset[i]
+            ...     # do something
     """
 
     def __init__(self):
@@ -109,8 +110,9 @@ class IterableDataset(Dataset):
             ...             yield image, label
             ...
             >>> dataset = RandomDataset(10)
-            >>> for img, lbl in dataset:
-            ...     print(img, lbl)
+            >>> for img, label in dataset:
+            ...     # do something
+            ...     ...
 
     When :attr:`num_workers > 0`, each worker has a different copy of the dataset object and
     will yield whole dataset samples, which means samples in dataset will be repeated in
@@ -158,7 +160,7 @@ class IterableDataset(Dataset):
             ...     drop_last=True)
             ...
             >>> for data in dataloader:
-            ...     print(data)
+            ...     print(data) # doctest: +SKIP
             Tensor(shape=[1, 1], dtype=int64, place=Place(cpu), stop_gradient=True,
                 [[2]])
             Tensor(shape=[1, 1], dtype=int64, place=Place(cpu), stop_gradient=True,
@@ -216,7 +218,7 @@ class IterableDataset(Dataset):
             ...     worker_init_fn=worker_init_fn)
             ...
             >>> for data in dataloader:
-            ...     print(data)
+            ...     print(data) # doctest: +SKIP
             Tensor(shape=[1, 1], dtype=int64, place=Place(cpu), stop_gradient=True,
                 [[2]])
             Tensor(shape=[1, 1], dtype=int64, place=Place(cpu), stop_gradient=True,
@@ -288,7 +290,7 @@ class TensorDataset(Dataset):
 
             >>> for i in range(len(dataset)):
             ...     input, label = dataset[i]
-            ...     print(input, label)
+            ...     # do something
     """
 
     def __init__(self, tensors):
@@ -354,10 +356,7 @@ class ComposeDataset(Dataset):
             >>> dataset = ComposeDataset([RandomDataset(10), RandomDataset(10)])
             >>> for i in range(len(dataset)):
             ...     image1, label1, image2, label2 = dataset[i]
-            ...     print(image1)
-            ...     print(label1)
-            ...     print(image2)
-            ...     print(label2)
+            ...     # do something
     """
 
     def __init__(self, datasets):
@@ -420,7 +419,9 @@ class ChainDataset(IterableDataset):
             ...
             >>> dataset = ChainDataset([RandomDataset(10), RandomDataset(10)])
             >>> for image, label in iter(dataset):
-            ...     print(image, label)
+            ...     # do something
+            ...     ...
+
     """
 
     def __init__(self, datasets):
@@ -497,6 +498,7 @@ def random_split(dataset, lengths, generator=None):
             >>> import paddle
             >>> from paddle.io import random_split
 
+            >>> paddle.seed(2023)
             >>> a_list = paddle.io.random_split(range(10), [3, 7])
             >>> print(len(a_list))
             2
@@ -504,24 +506,20 @@ def random_split(dataset, lengths, generator=None):
             >>> # output of the first subset
             >>> for idx, v in enumerate(a_list[0]):
             ...     print(idx, v)
-            >>> # doctest: +SKIP
-            0 1
-            1 3
-            2 9
-            >>> # doctest: -SKIP
+            0 8
+            1 2
+            2 5
 
             >>> # output of the second subset
             >>> for idx, v in enumerate(a_list[1]):
             ...     print(idx, v)
-            >>> # doctest: +SKIP
-            0 5
-            1 7
-            2 8
-            3 6
-            4 0
-            5 2
-            6 4
-            >>> # doctest: -SKIP
+            0 9
+            1 6
+            2 3
+            3 4
+            4 1
+            5 0
+            6 7
     """
     # Cannot verify that dataset is Sized
     if sum(lengths) != len(dataset):  # type: ignore
diff --git a/python/paddle/io/dataloader/sampler.py b/python/paddle/io/dataloader/sampler.py
index e85f51510b704..d26316ecc0eb7 100644
--- a/python/paddle/io/dataloader/sampler.py
+++ b/python/paddle/io/dataloader/sampler.py
@@ -170,8 +170,10 @@ class RandomSampler(Sampler):
 
         .. code-block:: python
 
+            >>> import numpy as np
             >>> from paddle.io import Dataset, RandomSampler
 
+            >>> np.random.seed(2023)
             >>> class RandomDataset(Dataset):
             ...     def __init__(self, num_samples):
             ...         self.num_samples = num_samples
@@ -188,6 +190,11 @@ class RandomSampler(Sampler):
 
             >>> for index in sampler:
             ...     print(index)
+            56
+            12
+            68
+            ...
+            87
     """
 
     def __init__(
@@ -297,14 +304,22 @@ class WeightedRandomSampler(Sampler):
 
         .. code-block:: python
 
+            >>> import numpy as np
             >>> from paddle.io import WeightedRandomSampler
 
-            >>> sampler = WeightedRandomSampler(weights=[0.1, 0.3, 0.5, 0.7, 0.2],
-            ...                                 num_samples=5,
-            ...                                 replacement=True)
-            ...
+            >>> np.random.seed(2023)
+            >>> sampler = WeightedRandomSampler(
+            ...     weights=[0.1, 0.3, 0.5, 0.7, 0.2],
+            ...     num_samples=5,
+            ...     replacement=True
+            ... )
             >>> for index in sampler:
             ...     print(index)
+            2
+            4
+            3
+            1
+            1
     """
 
     def __init__(self, weights, num_samples, replacement=True):

From 71c80e8feadcb7e5d021a24fa7a8462dbe386cc3 Mon Sep 17 00:00:00 2001
From: SigureMo <sigure.qaq@gmail.com>
Date: Wed, 2 Aug 2023 09:57:03 +0800
Subject: [PATCH 4/7] update output, test=docs_preview

---
 python/paddle/io/dataloader/dataset.py | 18 +++++++++---------
 python/paddle/io/dataloader/worker.py  | 14 ++++++--------
 python/paddle/io/multiprocess_utils.py |  2 --
 python/paddle/io/reader.py             |  2 --
 4 files changed, 15 insertions(+), 21 deletions(-)

diff --git a/python/paddle/io/dataloader/dataset.py b/python/paddle/io/dataloader/dataset.py
index 16a5f0379523f..a01da3292e37e 100755
--- a/python/paddle/io/dataloader/dataset.py
+++ b/python/paddle/io/dataloader/dataset.py
@@ -506,20 +506,20 @@ def random_split(dataset, lengths, generator=None):
             >>> # output of the first subset
             >>> for idx, v in enumerate(a_list[0]):
             ...     print(idx, v)
-            0 8
-            1 2
+            0 7
+            1 6
             2 5
 
             >>> # output of the second subset
             >>> for idx, v in enumerate(a_list[1]):
             ...     print(idx, v)
-            0 9
-            1 6
-            2 3
-            3 4
-            4 1
-            5 0
-            6 7
+            0 1
+            1 9
+            2 4
+            3 2
+            4 0
+            5 3
+            6 8
     """
     # Cannot verify that dataset is Sized
     if sum(lengths) != len(dataset):  # type: ignore
diff --git a/python/paddle/io/dataloader/worker.py b/python/paddle/io/dataloader/worker.py
index 6c9bdf3de0d9a..6e214dbf63102 100644
--- a/python/paddle/io/dataloader/worker.py
+++ b/python/paddle/io/dataloader/worker.py
@@ -13,8 +13,6 @@
 # limitations under the License.
 
 import os
-
-# NOTE: queue has a different name in python2 and python3
 import queue
 import sys
 import traceback
@@ -141,17 +139,17 @@ def get_worker_info():
             Tensor(shape=[1, 1], dtype=int64, place=Place(cpu), stop_gradient=True,
             [[2]])
             Tensor(shape=[1, 1], dtype=int64, place=Place(cpu), stop_gradient=True,
-            [[3]])
-            Tensor(shape=[1, 1], dtype=int64, place=Place(cpu), stop_gradient=True,
-            [[4]])
-            Tensor(shape=[1, 1], dtype=int64, place=Place(cpu), stop_gradient=True,
-            [[5]])
-            Tensor(shape=[1, 1], dtype=int64, place=Place(cpu), stop_gradient=True,
             [[6]])
             Tensor(shape=[1, 1], dtype=int64, place=Place(cpu), stop_gradient=True,
+            [[3]])
+            Tensor(shape=[1, 1], dtype=int64, place=Place(cpu), stop_gradient=True,
             [[7]])
             Tensor(shape=[1, 1], dtype=int64, place=Place(cpu), stop_gradient=True,
+            [[4]])
+            Tensor(shape=[1, 1], dtype=int64, place=Place(cpu), stop_gradient=True,
             [[8]])
+            Tensor(shape=[1, 1], dtype=int64, place=Place(cpu), stop_gradient=True,
+            [[5]])
 
     """
     return _worker_info
diff --git a/python/paddle/io/multiprocess_utils.py b/python/paddle/io/multiprocess_utils.py
index 51b0c2b818214..c57b6dae86b5e 100644
--- a/python/paddle/io/multiprocess_utils.py
+++ b/python/paddle/io/multiprocess_utils.py
@@ -13,8 +13,6 @@
 # limitations under the License.
 
 import atexit
-
-# NOTE: queue has a different name in python2 and python3
 import queue
 import signal
 import sys
diff --git a/python/paddle/io/reader.py b/python/paddle/io/reader.py
index 1ee61fa6f2f88..d8db6cc2ab012 100644
--- a/python/paddle/io/reader.py
+++ b/python/paddle/io/reader.py
@@ -14,8 +14,6 @@
 
 import copy
 import multiprocessing
-
-# NOTE: queue has a different name in python2 and python3
 import sys
 import time
 import warnings

From 90343f5bef35732ea9a6403596a8b64eff457c44 Mon Sep 17 00:00:00 2001
From: SigureMo <sigure.qaq@gmail.com>
Date: Wed, 2 Aug 2023 09:58:55 +0800
Subject: [PATCH 5/7] remove unused imports, test=docs_preview

---
 python/paddle/io/dataloader/dataset.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/python/paddle/io/dataloader/dataset.py b/python/paddle/io/dataloader/dataset.py
index a01da3292e37e..cf4f8981bef91 100755
--- a/python/paddle/io/dataloader/dataset.py
+++ b/python/paddle/io/dataloader/dataset.py
@@ -496,7 +496,6 @@ def random_split(dataset, lengths, generator=None):
         .. code-block:: python
 
             >>> import paddle
-            >>> from paddle.io import random_split
 
             >>> paddle.seed(2023)
             >>> a_list = paddle.io.random_split(range(10), [3, 7])

From 8619edd3aaea9da36be250259598aacc53bd8421 Mon Sep 17 00:00:00 2001
From: SigureMo <sigure.qaq@gmail.com>
Date: Wed, 2 Aug 2023 11:28:24 +0800
Subject: [PATCH 6/7] skip some device depends apis, test=docs_preview

---
 python/paddle/io/dataloader/dataset.py | 4 ++--
 python/paddle/io/dataloader/worker.py  | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/python/paddle/io/dataloader/dataset.py b/python/paddle/io/dataloader/dataset.py
index cf4f8981bef91..4790ad5f3fd1e 100755
--- a/python/paddle/io/dataloader/dataset.py
+++ b/python/paddle/io/dataloader/dataset.py
@@ -504,14 +504,14 @@ def random_split(dataset, lengths, generator=None):
 
             >>> # output of the first subset
             >>> for idx, v in enumerate(a_list[0]):
-            ...     print(idx, v)
+            ...     print(idx, v) # doctest: +skip
             0 7
             1 6
             2 5
 
             >>> # output of the second subset
             >>> for idx, v in enumerate(a_list[1]):
-            ...     print(idx, v)
+            ...     print(idx, v) # doctest: +skip
             0 1
             1 9
             2 4
diff --git a/python/paddle/io/dataloader/worker.py b/python/paddle/io/dataloader/worker.py
index 6e214dbf63102..cd9ca6e081692 100644
--- a/python/paddle/io/dataloader/worker.py
+++ b/python/paddle/io/dataloader/worker.py
@@ -135,7 +135,7 @@ def get_worker_info():
             ...     drop_last=True)
             ...
             >>> for data in dataloader:
-            ...     print(data)
+            ...     print(data) # doctest: +SKIP
             Tensor(shape=[1, 1], dtype=int64, place=Place(cpu), stop_gradient=True,
             [[2]])
             Tensor(shape=[1, 1], dtype=int64, place=Place(cpu), stop_gradient=True,

From a89232c32cbd3e3fab559cdbb2f8ef8de93d9ac0 Mon Sep 17 00:00:00 2001
From: SigureMo <sigure.qaq@gmail.com>
Date: Wed, 2 Aug 2023 11:32:05 +0800
Subject: [PATCH 7/7] add skip reason, test=docs_preview

---
 python/paddle/io/dataloader/dataset.py | 8 ++++----
 python/paddle/io/dataloader/worker.py  | 2 +-
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/python/paddle/io/dataloader/dataset.py b/python/paddle/io/dataloader/dataset.py
index 4790ad5f3fd1e..4daf410a31836 100755
--- a/python/paddle/io/dataloader/dataset.py
+++ b/python/paddle/io/dataloader/dataset.py
@@ -160,7 +160,7 @@ class IterableDataset(Dataset):
             ...     drop_last=True)
             ...
             >>> for data in dataloader:
-            ...     print(data) # doctest: +SKIP
+            ...     print(data) # doctest: +SKIP("The output depends on the environment.")
             Tensor(shape=[1, 1], dtype=int64, place=Place(cpu), stop_gradient=True,
                 [[2]])
             Tensor(shape=[1, 1], dtype=int64, place=Place(cpu), stop_gradient=True,
@@ -218,7 +218,7 @@ class IterableDataset(Dataset):
             ...     worker_init_fn=worker_init_fn)
             ...
             >>> for data in dataloader:
-            ...     print(data) # doctest: +SKIP
+            ...     print(data) # doctest: +SKIP("The output depends on the environment.")
             Tensor(shape=[1, 1], dtype=int64, place=Place(cpu), stop_gradient=True,
                 [[2]])
             Tensor(shape=[1, 1], dtype=int64, place=Place(cpu), stop_gradient=True,
@@ -504,14 +504,14 @@ def random_split(dataset, lengths, generator=None):
 
             >>> # output of the first subset
             >>> for idx, v in enumerate(a_list[0]):
-            ...     print(idx, v) # doctest: +skip
+            ...     print(idx, v) # doctest: +SKIP("The output depends on the environment.")
             0 7
             1 6
             2 5
 
             >>> # output of the second subset
             >>> for idx, v in enumerate(a_list[1]):
-            ...     print(idx, v) # doctest: +skip
+            ...     print(idx, v) # doctest: +SKIP("The output depends on the environment.")
             0 1
             1 9
             2 4
diff --git a/python/paddle/io/dataloader/worker.py b/python/paddle/io/dataloader/worker.py
index cd9ca6e081692..4a1667483da64 100644
--- a/python/paddle/io/dataloader/worker.py
+++ b/python/paddle/io/dataloader/worker.py
@@ -135,7 +135,7 @@ def get_worker_info():
             ...     drop_last=True)
             ...
             >>> for data in dataloader:
-            ...     print(data) # doctest: +SKIP
+            ...     print(data) # doctest: +SKIP("The output depends on the environment.")
             Tensor(shape=[1, 1], dtype=int64, place=Place(cpu), stop_gradient=True,
             [[2]])
             Tensor(shape=[1, 1], dtype=int64, place=Place(cpu), stop_gradient=True,