You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

projector_reader.py 3.9 kB

4 years ago
4 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111
  1. # -*- coding: UTF-8 -*-
  2. """
  3. Copyright 2020 Tianshu AI Platform. All Rights Reserved.
  4. Licensed under the Apache License, Version 2.0 (the "License");
  5. you may not use this file except in compliance with the License.
  6. You may obtain a copy of the License at
  7. http://www.apache.org/licenses/LICENSE-2.0
  8. Unless required by applicable law or agreed to in writing, software
  9. distributed under the License is distributed on an "AS IS" BASIS,
  10. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  11. See the License for the specific language governing permissions and
  12. limitations under the License.
  13. =============================================================
  14. """
  15. from oneflow.customized.utils import projector_pb2
  16. from collections.abc import Iterable
  17. from typing import BinaryIO
  18. from typing import Optional
  19. import numpy as np
  20. from collections import namedtuple
  21. ProjectorSummaryItem = namedtuple(
  22. "ProjectorSummaryItem", ["metadata", "sample", "projectors"]
  23. )
  24. Metadata = namedtuple("Metadata", ["type", "content"])
  25. # SampleItem = namedtuple("SampleItem", ["name", "type", "X"])
  26. ProjectorItem = namedtuple(
  27. 'ProjectorItem', ['tag', 'step', 'wall_time', 'value', 'label']
  28. )
  29. def _decode_tensor(tensor):
  30. _tensor_shape = tuple([i.size for i in tensor.shape.dim])
  31. _decoded = np.frombuffer(tensor.content, dtype=tensor.dtype)
  32. _decoded = _decoded.reshape(_tensor_shape)
  33. return _decoded
  34. class Projector_Reader:
  35. def __init__(
  36. self,
  37. fileblock: BinaryIO,
  38. tag_filter: Optional[Iterable] = None,
  39. stop_on_error: bool = False
  40. ):
  41. """
  42. Initalize new projector reader
  43. :param fileblock: A File IO for projector data
  44. :param tag_filter: A list of tags to leave (`None` for all)
  45. :param stop_on_error: Whether stop on a broken file(TODO)
  46. """
  47. self._fileblock = fileblock
  48. self._tag_filter = set(tag_filter) if tag_filter is not None else None
  49. self._stop_on_error = stop_on_error
  50. self._TYPES = {
  51. 0: "embedding",
  52. 1: "exception"
  53. }
  54. self._DATASETTYPES = {
  55. 0: "image",
  56. 1: "audio",
  57. 2: "text"
  58. }
  59. def _decode_sample(self, dataset):
  60. _name = dataset.name
  61. _type = self._DATASETTYPES[dataset.type]
  62. _X = _decode_tensor(dataset.X)
  63. return dict(name=_name, type=_type, X=_X)
  64. def _decode_metadata(self, metadata):
  65. return Metadata(
  66. type=self._TYPES[metadata.type],
  67. content=metadata.content
  68. )
  69. def _decode_projector(self, projector: Iterable) -> Optional[ProjectorItem]:
  70. for pro in projector:
  71. _value = _decode_tensor(pro.value)
  72. _label = _decode_tensor(pro.label) if pro.HasField('label') else None
  73. yield ProjectorItem(
  74. tag=pro.tag,
  75. step=pro.step,
  76. wall_time=pro.WALL_TIME,
  77. value=_value,
  78. label=_label
  79. )
  80. def _check_tag(self, tag: str) -> bool:
  81. """
  82. Check if a tag matches the current tag filter
  83. :param tag: A string with tag
  84. :return: A boolean value.
  85. """
  86. return self._tag_filter is None or tag in self._tag_filter
  87. def read(self) -> ProjectorSummaryItem:
  88. summary = projector_pb2.SummaryProjector()
  89. summary.ParseFromString(self._fileblock.read())
  90. psi = ProjectorSummaryItem(
  91. metadata=self._decode_metadata(summary.metadata),
  92. sample=self._decode_sample(summary.sample)
  93. if summary.HasField("sample") else None,
  94. projectors=[item for item in self._decode_projector(summary.projector)
  95. if item is not None and all([self._check_tag(item.tag)])]
  96. )
  97. return psi

一站式算法开发平台、高性能分布式深度学习框架、先进算法模型库、视觉模型炼知平台、数据可视化分析平台等一系列平台及工具,在模型高效分布式训练、数据处理和可视分析、模型炼知和轻量化等技术上形成独特优势,目前已在产学研等各领域近千家单位及个人提供AI应用赋能