You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

engine.py 6.4 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190
  1. """
  2. Copyright 2020 Tianshu AI Platform. All Rights Reserved.
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. =============================================================
  13. """
  14. import torch
  15. import torch.nn as nn
  16. import abc, math, weakref, typing, time
  17. from typing import Any, Callable, Optional, Sequence
  18. import numpy as np
  19. from kamal.core.engine.events import DefaultEvents, Event
  20. from kamal.core import tasks
  21. from kamal.utils import set_mode, move_to_device, get_logger
  22. from collections import defaultdict
  23. import numbers
  24. import contextlib
  25. class State(object):
  26. def __init__(self):
  27. self.iter = 0
  28. self.max_iter = None
  29. self.epoch_length = None
  30. self.dataloader = None
  31. self.seed = None
  32. self.metrics=dict()
  33. self.batch=None
  34. @property
  35. def current_epoch(self):
  36. if self.epoch_length is not None:
  37. return self.iter // self.epoch_length
  38. return None
  39. @property
  40. def max_epoch(self):
  41. if self.epoch_length is not None:
  42. return self.max_iter // self.epoch_length
  43. return None
  44. @property
  45. def current_batch_index(self):
  46. if self.epoch_length is not None:
  47. return self.iter % self.epoch_length
  48. return None
  49. @property
  50. def max_batch_index(self):
  51. return self.epoch_length
  52. def __repr__(self):
  53. rep = "State:\n"
  54. for attr, value in self.__dict__.items():
  55. if not isinstance(value, (numbers.Number, str, dict)):
  56. value = type(value)
  57. rep += "\t{}: {}\n".format(attr, value)
  58. return rep
  59. class Engine(abc.ABC):
  60. def __init__(self, logger=None, tb_writer=None):
  61. self._logger = logger if logger else get_logger(name='kamal', color=True)
  62. self._tb_writer = tb_writer
  63. self._callbacks = defaultdict(list)
  64. self._allowed_events = [ *DefaultEvents ]
  65. self._state = State()
  66. def reset(self):
  67. self._state = State()
  68. def run(self, step_fn: Callable, dataloader, max_iter, start_iter=0, epoch_length=None):
  69. self.state.iter = self._state.start_iter = start_iter
  70. self.state.max_iter = max_iter
  71. self.state.epoch_length = epoch_length if epoch_length else len(dataloader)
  72. self.state.dataloader = dataloader
  73. self.state.dataloader_iter = iter(dataloader)
  74. self.state.step_fn = step_fn
  75. self.trigger_events(DefaultEvents.BEFORE_RUN)
  76. for self.state.iter in range( start_iter, max_iter ):
  77. if self.state.epoch_length!=None and \
  78. self.state.iter%self.state.epoch_length==0: # Epoch Start
  79. self.trigger_events(DefaultEvents.BEFORE_EPOCH)
  80. self.trigger_events(DefaultEvents.BEFORE_STEP)
  81. self.state.batch = self._get_batch()
  82. step_output = step_fn(self, self.state.batch)
  83. if isinstance(step_output, dict):
  84. self.state.metrics.update(step_output)
  85. self.trigger_events(DefaultEvents.AFTER_STEP)
  86. if self.state.epoch_length!=None and \
  87. (self.state.iter+1)%self.state.epoch_length==0: # Epoch End
  88. self.trigger_events(DefaultEvents.AFTER_EPOCH)
  89. self.trigger_events(DefaultEvents.AFTER_RUN)
  90. def _get_batch(self):
  91. try:
  92. batch = next( self.state.dataloader_iter )
  93. except StopIteration:
  94. self.state.dataloader_iter = iter(self.state.dataloader) # reset iterator
  95. batch = next( self.state.dataloader_iter )
  96. if not isinstance(batch, (list, tuple)):
  97. batch = [ batch, ] # no targets
  98. return batch
  99. @property
  100. def state(self):
  101. return self._state
  102. @property
  103. def logger(self):
  104. return self._logger
  105. @property
  106. def tb_writer(self):
  107. return self._tb_writer
  108. def add_callback(self, event: Event, callbacks ):
  109. if not isinstance(callbacks, Sequence):
  110. callbacks = [callbacks]
  111. if event in self._allowed_events:
  112. for callback in callbacks:
  113. if callback not in self._callbacks[event]:
  114. if event.trigger!=event.default_trigger:
  115. callback = self._trigger_wrapper(self, event.trigger, callback )
  116. self._callbacks[event].append( callback )
  117. callbacks = [ RemovableCallback(self, event, c) for c in callbacks ]
  118. return ( callbacks[0] if len(callbacks)==1 else callbacks )
  119. def remove_callback(self, event, callback):
  120. for c in self._callbacks[event]:
  121. if c==callback:
  122. self._callbacks.remove( callback )
  123. return True
  124. return False
  125. @staticmethod
  126. def _trigger_wrapper(engine, trigger, callback):
  127. def wrapper(*args, **kwargs) -> Any:
  128. if trigger(engine):
  129. return callback(engine)
  130. return wrapper
  131. def trigger_events(self, *events):
  132. for e in events:
  133. if e in self._allowed_events:
  134. for callback in self._callbacks[e]:
  135. callback(self)
  136. def register_events(self, *events):
  137. for e in events:
  138. if e not in self._allowed_events:
  139. self._allowed_events.apped( e )
  140. @contextlib.contextmanager
  141. def save_current_callbacks(self):
  142. temp = self._callbacks
  143. self._callbacks = defaultdict(list)
  144. yield
  145. self._callbacks = temp
  146. class RemovableCallback:
  147. def __init__(self, engine, event, callback):
  148. self._engine = weakref.ref(engine)
  149. self._callback = weakref.ref(callback)
  150. self._event = weakref.ref(event)
  151. @property
  152. def callback(self):
  153. return self._callback()
  154. def remove(self):
  155. engine = self._engine()
  156. callback = self._callback()
  157. event = self._event()
  158. return engine.remove_callback(event, callback)

一站式算法开发平台、高性能分布式深度学习框架、先进算法模型库、视觉模型炼知平台、数据可视化分析平台等一系列平台及工具,在模型高效分布式训练、数据处理和可视分析、模型炼知和轻量化等技术上形成独特优势,目前已在产学研等各领域近千家单位及个人提供AI应用赋能

Contributors (1)