【python实现】实时监测GPU,空闲时自动执行脚本

import os import sys import time cmd = 'nohup python -u train_post_2d_aut.py > output1.log &' # gpu空闲时,需要执行的脚本命令 def gpu_info(): gpu_status = os.popen('nvidia-smi | grep %').read().split('|') # 监控2号gpu gpu_memory = int(gpu_status[10].split('/')[0].split('M')[0].strip()) # max:12288 gpu_power = int(gpu_status[9].split(' ')[-1].split('/')[0].split('W')[0].strip()) # max:250 # 监控3号gpu # gpu_memory = int(gpu_status[14].split('/')[0].split('M')[0].strip()) # max:12288 # gpu_power = int(gpu_status[13].split(' ')[-1].split('/')[0].split('W')[0].strip()) # 获取空闲的功率和内存 power_all, memory_all = 250, 12288 # gpu功率和内存的最大值 power_free = power_all - gpu_power memory_free = memory_all - gpu_memory return power_free, memory_free def narrow_setup(interval=2): power_free, memory_free = gpu_info() i = 0 while not (memory_free > 7000 and power_free > 175): # 如果空闲的内存和功率大于指定阈值,就执行cmd脚本 power_free, memory_free = gpu_info() i = i % 10 symbol = 'monitoring: ' + '>' * i + ' ' * (10 - i - 1) + '|' gpu_power_str = 'gpu power_free:%d W |' % power_free gpu_memory_str = 'gpu memory_free:%d MiB |' % memory_free sys.stdout.write('\r' + gpu_memory_str + ' ' + gpu_power_str + ' ' + symbol) sys.stdout.flush() time.sleep(interval) # 每2s检测一次 i += 1 print('\n' + cmd) os.system(cmd) if __name__ == '__main__': narrow_setup()
上一篇:粘性代理 vs 轮换代理: 特点、优势与选择指南


下一篇:Java基础——多线程(一)