|
1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162 |
- # Copyright 2019 Huawei Technologies Co., Ltd
- #
- # Licensed under the Apache License, Version 2.0 (the "License");
- # you may not use this file except in compliance with the License.
- # You may obtain a copy of the License at
- #
- # http://www.apache.org/licenses/LICENSE-2.0
- #
- # Unless required by applicable law or agreed to in writing, software
- # distributed under the License is distributed on an "AS IS" BASIS,
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- # See the License for the specific language governing permissions and
- # limitations under the License.
- import mindspore.dataset as ds
- import mindspore.dataset.transforms.vision.c_transforms as CV
- import mindspore.dataset.transforms.c_transforms as C
- from mindspore.dataset.transforms.vision import Inter
- import mindspore.common.dtype as mstype
-
-
- def generate_mnist_dataset(data_path, batch_size=32, repeat_size=1,
- num_parallel_workers=1, sparse=True):
- """
- create dataset for training or testing
- """
- # define dataset
- ds1 = ds.MnistDataset(data_path)
-
- # define operation parameters
- resize_height, resize_width = 32, 32
- rescale = 1.0 / 255.0
- shift = 0.0
-
- # define map operations
- resize_op = CV.Resize((resize_height, resize_width),
- interpolation=Inter.LINEAR)
- rescale_op = CV.Rescale(rescale, shift)
- hwc2chw_op = CV.HWC2CHW()
- type_cast_op = C.TypeCast(mstype.int32)
- one_hot_enco = C.OneHot(10)
-
- # apply map operations on images
- if not sparse:
- ds1 = ds1.map(input_columns="label", operations=one_hot_enco,
- num_parallel_workers=num_parallel_workers)
- type_cast_op = C.TypeCast(mstype.float32)
- ds1 = ds1.map(input_columns="label", operations=type_cast_op,
- num_parallel_workers=num_parallel_workers)
- ds1 = ds1.map(input_columns="image", operations=resize_op,
- num_parallel_workers=num_parallel_workers)
- ds1 = ds1.map(input_columns="image", operations=rescale_op,
- num_parallel_workers=num_parallel_workers)
- ds1 = ds1.map(input_columns="image", operations=hwc2chw_op,
- num_parallel_workers=num_parallel_workers)
-
- # apply DatasetOps
- buffer_size = 10000
- ds1 = ds1.shuffle(buffer_size=buffer_size)
- ds1 = ds1.batch(batch_size, drop_remainder=True)
- ds1 = ds1.repeat(repeat_size)
-
- return ds1
|