卡GPU显存小程序

使用之前只需要简单的设置两个变量即可: 

  1. Memory_Allocated_GB = 5  # GiB,需要占用的显存大小,单位GiB
  2. GPU_ID = 2  # 需要卡显存的GPU ID
import torch
from torch import cuda
import time
import psutil


def get_gpu_used_information():
    device_count = cuda.device_count()
    used_information = []
    for gpu_id in range(device_count):
        gpu_processes_list = cuda.list_gpu_processes(gpu_id).split('\n')
        for process in gpu_processes_list:
            if process.startswith('process'):
                inf = process.split()
                used_information.append((gpu_id, inf[1], inf[3][:-4], psutil.Process(int(inf[1])).username().split('\\')[1]))

    return used_information


def print_information(infs):
    print('\033[0:32m+----------------------------------------------------------+\033[0m')
    print('\033[0:32m|     GPU       PID       MEMORY-USED       USER-NAME      |\033[0m')
    for inf in infs:
        record = "{:>8d} {:>9d} {:>14d}MiB {}".format(inf[0], int(inf[1]), int(inf[2]), str(inf[3]).rjust(15))
        print('\033[0:32m|' + record + ' ' * 6 + '|\033[0m')
    print('\033[0:32m+----------------------------------------------------------+\033[0m')


if __name__ == '__main__':
    Memory_Allocated_GB = 5  # GiB
    GPU_ID = 2
    ace = None
    while True:
        print('\033[0:33mNow: ' + time.strftime('%Y-%m-%d %H:%M:%S') + '\033[0m')
        used_inf = get_gpu_used_information()
        print_information(used_inf)
        try:
            device = 'cuda:{}'.format(GPU_ID)
            ace = torch.zeros([Memory_Allocated_GB, 1024, 1024, 256], device=device)
        except RuntimeError:
            ace = None
            for sec in range(10)[::-1]:
                print("\r\033[0:31mRace to control GPU: {} {}GiB failed, tra again after {}s...\033[0m".format(
                    GPU_ID, Memory_Allocated_GB,  sec),  flush=True, end='')
                time.sleep(1)
            print()
        else:
            print("\033[1:35mGPU: {}, memory allocated: {} GB\033[0m".format(GPU_ID, Memory_Allocated_GB))
            while True:
                print("\r\033[1:35mGPU: {}, hold-up time: {}\033[0m".format(GPU_ID, time.strftime('%Y-%m-%d %H:%M:%S')), end='',
                      flush=True)
                time.sleep(1)

 

上一篇:ubuntu terminal光标消失


下一篇:C语言-输出彩色字体