CPUID读取有关Cache的信息

   1: void cpuidTest()
   2: {
   3:     u32 val_eax, val_ebx, val_ecx, val_edx; 
   4:     asm("cpuid"
   5:             : "=a" (val_eax),
   6:               "=b" (val_ebx),
   7:               "=d" (val_ecx),
   8:               "=c" (val_edx)
   9:             : "a" (2));
  10:  
  11:     printk("eax: 0x%08X\n", val_eax);
  12:     printk("ebx: 0x%08X\n", val_ebx);
  13:     printk("ecx: 0x%08X\n", val_ecx);
  14:     printk("edx: 0x%08X\n", val_edx);
  15: }

读出结果如下:

   1: [190894.986103] ###################################################################
   2: [190894.986109] eax: 0x76035A01
   3: [190894.986110] ebx: 0x00F0B0FF
   4: [190894.986111] ecx: 0x00CA0000
   5: [190894.986112] edx: 0x00000000
   6: [190894.986951] ###################################################################

解析出有效的descriptor

   1: 76H: TLB Instruction TLB: 2M/4M pages, fully associative, 8 entries 
   2: 03H: TLB Data TLB: 4 KByte pages, 4-way set associative, 64 entries
   3: 5AH:TLB Data TLB0: 2-MByte or 4 MByte pages, 4-way set associative, 32 entries
   4: F0H:Prefetch 64-Byte prefetching
   5: B0H:TLB Instruction TLB: 4 KByte pages, 4-way set associative, 128 entries
   6: FFH: General CPUID leaf 2 does not report cache descriptor information, use CPUID leaf 4 to query cache parameters
   7: CAH: STLB Shared 2nd-Level TLB: 4 KByte pages, 4-way associative, 512 entries

可以看到,

General CPUID leaf 2 does not report cache descriptor information, use CPUID leaf 4 to query cache parameters

没有返回Cache相关的信息,都是TLB信息。如果需要了解Cache的信息,需要使用4作为EAX的输入。

我们重新组装代码,读取Cache相关的信息:

   1: void cpuidTest()
   2: {
   3:     u32 val_eax, val_ebx, val_ecx, val_edx; 
   4:     asm("cpuid"
   5:             : "=a" (val_eax),
   6:               "=b" (val_ebx),
   7:               "=d" (val_ecx),
   8:               "=c" (val_edx)
   9:             : "a" (4), "c"(1));
  10:  
  11:     u32 ways,partitions,line_Size, sets;
  12:  
  13:     ways = val_ebx >> 22;
  14:     partitions = (val_ebx >> 12) & 0x3FF;
  15:     line_Size = (val_ebx) & 0xFFF;
  16:     sets = val_ecx;
  17:  
  18:     printk("eax: 0x%08X\n", val_eax);
  19:     printk("ebx: 0x%08X\n", val_ebx);
  20:     printk("ecx: 0x%08X\n", val_ecx);
  21:     printk("edx: 0x%08X\n", val_edx);
  22:  
  23:     printk("ways: %d\n", ways+1);
  24:     printk("partitions: %d\n", partitions+1);
  25:     printk("line_size: %d\n", line_Size+1);
  26:     printk("sets: %d\n", sets+1);
  27:     printk("Cache L1 size: %d\n", (ways + 1)*(partitions + 1)*(line_Size + 1)*(sets + 1));
  28: }

结果如下:

   1: [193334.815202] ###################################################################
   2: [193334.815206] eax: 0x00000021
   3: [193334.815207] ebx: 0x01C0003F
   4: [193334.815208] ecx: 0x00000000
   5: [193334.815209] edx: 0x0000003F
   6: [193334.815209] ways: 8
   7: [193334.815210] partitions: 1
   8: [193334.815211] line_size: 64
   9: [193334.815211] sets: 1
  10: [193334.815212] Cache L1 size: 512
  11: [193334.815672] ###################################################################
可见,L1的Cache是“全相关”,即只有一个cache set,其中有8路,即8个缓存行,每个缓存行里面包含的数据是64bytes,总共512bytes的缓存。

 

Linux是怎么读取的呢?

   1: daniel@ubuntu:/mod/pslist$ cat /proc/cpuinfo
   2: processor    : 0
   3: vendor_id    : GenuineIntel
   4: cpu family    : 6
   5: model        : 42
   6: model name    : Intel(R) Core(TM) i5-2500 CPU @ 3.30GHz
   7: stepping    : 7
   8: cpu MHz        : 3269.310
   9: cache size    : 6144 KB
  10: fdiv_bug    : no
  11: hlt_bug        : no
  12: f00f_bug    : no
  13: coma_bug    : no
  14: fpu        : yes
  15: fpu_exception    : yes
  16: cpuid level    : 5
  17: wp        : yes
  18: flags        : fpu vme de pse tsc msr pae mce cx8 apic mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 syscall nx rdtscp lm constant_tsc up pni monitor ssse3 lahf_lm
  19: bogomips    : 6538.62
  20: clflush size    : 64
  21: cache_alignment    : 64
  22: address sizes    : 36 bits physical, 48 bits virtual
  23: power management:

 

   1: static int show_cpuinfo(struct seq_file *m, void *v)
   2: {
   3:     struct cpuinfo_x86 *c = v;
   4:     unsigned int cpu;
   5:     int i;
   6:  
   7: ******
   8: /* Cache size */
   9: if (c->x86_cache_size >= 0)
  10:     seq_printf(m, "cache size\t: %d KB\n", c->x86_cache_size);
  11: ******
  12: }
   1: unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c)
   2: {
   3:     /* Cache sizes */
   4:     unsigned int trace = 0, l1i = 0, l1d = 0, l2 = 0, l3 = 0;
   5:     unsigned int new_l1d = 0, new_l1i = 0; /* Cache sizes from cpuid(4) */
   6:     unsigned int new_l2 = 0, new_l3 = 0, i; /* Cache sizes from cpuid(4) */
   7:     unsigned int l2_id = 0, l3_id = 0, num_threads_sharing, index_msb;
   8: #ifdef CONFIG_X86_HT
   9:     unsigned int cpu = c->cpu_index;
  10: #endif
  11:  
  12:     if (c->cpuid_level > 3) {
  13:         static int is_initialized;
  14:  
  15:         if (is_initialized == 0) {
  16:             /* Init num_cache_leaves from boot CPU */
  17:             num_cache_leaves = find_num_cache_leaves();
  18:             is_initialized++;
  19:         }
  20:  
  21:         /*
  22:          * Whenever possible use cpuid(4), deterministic cache
  23:          * parameters cpuid leaf to find the cache details
  24:          */
  25:         for (i = 0; i < num_cache_leaves; i++) {
  26:             struct _cpuid4_info_regs this_leaf;
  27:             int retval;
  28:  
  29:             retval = cpuid4_cache_lookup_regs(i, &this_leaf);
  30:             if (retval >= 0) {
  31:                 switch (this_leaf.eax.split.level) {
  32:                 case 1:
  33:                     if (this_leaf.eax.split.type ==
  34:                             CACHE_TYPE_DATA)
  35:                         new_l1d = this_leaf.size/1024;
  36:                     else if (this_leaf.eax.split.type ==
  37:                             CACHE_TYPE_INST)
  38:                         new_l1i = this_leaf.size/1024;
  39:                     break;
  40:                 case 2:
  41:                     new_l2 = this_leaf.size/1024;
  42:                     num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing;
  43:                     index_msb = get_count_order(num_threads_sharing);
  44:                     l2_id = c->apicid >> index_msb;
  45:                     break;
  46:                 case 3:
  47:                     new_l3 = this_leaf.size/1024;
  48:                     num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing;
  49:                     index_msb = get_count_order(
  50:                             num_threads_sharing);
  51:                     l3_id = c->apicid >> index_msb;
  52:                     break;
  53:                 default:
  54:                     break;
  55:                 }
  56:             }
  57:         }
  58:     }
  59:     /*
  60:      * Don‘t use cpuid2 if cpuid4 is supported. For P4, we use cpuid2 for
  61:      * trace cache
  62:      */
  63:     if ((num_cache_leaves == 0 || c->x86 == 15) && c->cpuid_level > 1) {
  64:         /* supports eax=2  call */
  65:         int j, n;
  66:         unsigned int regs[4];
  67:         unsigned char *dp = (unsigned char *)regs;
  68:         int only_trace = 0;
  69:  
  70:         if (num_cache_leaves != 0 && c->x86 == 15)
  71:             only_trace = 1;
  72:  
  73:         /* Number of times to iterate */
  74:         n = cpuid_eax(2) & 0xFF;
  75:  
  76:         for (i = 0 ; i < n ; i++) {
  77:             cpuid(2, &regs[0], &regs[1], &regs[2], &regs[3]);
  78:  
  79:             /* If bit 31 is set, this is an unknown format */
  80:             for (j = 0 ; j < 3 ; j++)
  81:                 if (regs[j] & (1 << 31))
  82:                     regs[j] = 0;
  83:  
  84:             /* Byte 0 is level count, not a descriptor */
  85:             for (j = 1 ; j < 16 ; j++) {
  86:                 unsigned char des = dp[j];
  87:                 unsigned char k = 0;
  88:  
  89:                 /* look up this descriptor in the table */
  90:                 while (cache_table[k].descriptor != 0) {
  91:                     if (cache_table[k].descriptor == des) {
  92:                         if (only_trace && cache_table[k].cache_type != LVL_TRACE)
  93:                             break;
  94:                         switch (cache_table[k].cache_type) {
  95:                         case LVL_1_INST:
  96:                             l1i += cache_table[k].size;
  97:                             break;
  98:                         case LVL_1_DATA:
  99:                             l1d += cache_table[k].size;
 100:                             break;
 101:                         case LVL_2:
 102:                             l2 += cache_table[k].size;
 103:                             break;
 104:                         case LVL_3:
 105:                             l3 += cache_table[k].size;
 106:                             break;
 107:                         case LVL_TRACE:
 108:                             trace += cache_table[k].size;
 109:                             break;
 110:                         }
 111:  
 112:                         break;
 113:                     }
 114:  
 115:                     k++;
 116:                 }
 117:             }
 118:         }
 119:     }
 120:  
 121:     if (new_l1d)
 122:         l1d = new_l1d;
 123:  
 124:     if (new_l1i)
 125:         l1i = new_l1i;
 126:  
 127:     if (new_l2) {
 128:         l2 = new_l2;
 129: #ifdef CONFIG_X86_HT
 130:         per_cpu(cpu_llc_id, cpu) = l2_id;
 131: #endif
 132:     }
 133:  
 134:     if (new_l3) {
 135:         l3 = new_l3;
 136: #ifdef CONFIG_X86_HT
 137:         per_cpu(cpu_llc_id, cpu) = l3_id;
 138: #endif
 139:     }
 140:  
 141:     c->x86_cache_size = l3 ? l3 : (l2 ? l2 : (l1i+l1d));
 142:  
 143:     return l2;
 144: }

c->x86_cache_size = l3 ? l3 : (l2 ? l2 : (l1i+l1d));

CPUID读取有关Cache的信息

上一篇:24点游戏


下一篇:初次接触SASS