if abs(self.monitor_value) != float('inf'): # 如果是 inf 说明从来没有运行过。
# 如果是分布式且报错了,就不要加载了,防止barrier的问题
if not (trainer.driver.is_distributed() and self.encounter_exception):
if self.real_save_folder:
logger.info(f"Loading best model from {self.real_save_folder} with {self._real_monitor}: {self.monitor_value}...")
logger.info(f"Loading best model from {self.real_save_folder} with {self._real_monitor}: {self.monitor_value} (achieved in Epoch:{self.meta['epoch']}, Global Batch:{self.meta['batch']})...")
logger.info(f"Loading best model from buffer with {self._real_monitor}: {self.monitor_value}...")
logger.info(f"Loading best model from buffer with {self._real_monitor}: {self.monitor_value} (achieved in Epoch:{self.meta['epoch']}, Global Batch:{self.meta['batch']})...")