]> git.hungrycats.org Git - linux/commitdiff
[PATCH] OProfile: reduce allocations of MSR structs
authorJohn Levon <levon@movementarian.org>
Thu, 21 Aug 2003 09:57:03 +0000 (02:57 -0700)
committerLinus Torvalds <torvalds@home.osdl.org>
Thu, 21 Aug 2003 09:57:03 +0000 (02:57 -0700)
Andi Kleen pointed out the MSRs array was a massive bloat source. Reduce
it somewhat by only allocating the amount actually needed for the CPU type.

Untested on Pentium IV - I don't have a machine.

arch/i386/oprofile/nmi_int.c
arch/i386/oprofile/op_model_athlon.c
arch/i386/oprofile/op_model_p4.c
arch/i386/oprofile/op_model_ppro.c
arch/i386/oprofile/op_x86_model.h

index 18655f44b88c0db0771245be7c299828796469ad..0d34826889dd3c4cab2008ac821f2e139d3022f9 100644 (file)
@@ -12,6 +12,7 @@
 #include <linux/smp.h>
 #include <linux/oprofile.h>
 #include <linux/sysdev.h>
+#include <linux/slab.h>
 #include <asm/nmi.h>
 #include <asm/msr.h>
 #include <asm/apic.h>
@@ -91,24 +92,66 @@ static void nmi_save_registers(struct op_msrs * msrs)
 {
        unsigned int const nr_ctrs = model->num_counters;
        unsigned int const nr_ctrls = model->num_controls; 
-       struct op_msr_group * counters = &msrs->counters;
-       struct op_msr_group * controls = &msrs->controls;
+       struct op_msr * counters = msrs->counters;
+       struct op_msr * controls = msrs->controls;
        unsigned int i;
 
        for (i = 0; i < nr_ctrs; ++i) {
-               rdmsr(counters->addrs[i],
-                       counters->saved[i].low,
-                       counters->saved[i].high);
+               rdmsr(counters[i].addr,
+                       counters[i].saved.low,
+                       counters[i].saved.high);
        }
  
        for (i = 0; i < nr_ctrls; ++i) {
-               rdmsr(controls->addrs[i],
-                       controls->saved[i].low,
-                       controls->saved[i].high);
+               rdmsr(controls[i].addr,
+                       controls[i].saved.low,
+                       controls[i].saved.high);
        }
 }
 
+
+static void free_msrs(void)
+{
+       int i;
+       for (i = 0; i < NR_CPUS; ++i) {
+               kfree(cpu_msrs[i].counters);
+               cpu_msrs[i].counters = NULL;
+               kfree(cpu_msrs[i].controls);
+               cpu_msrs[i].controls = NULL;
+       }
+}
+
+
+static int allocate_msrs(void)
+{
+       int success = 1;
+       size_t controls_size = sizeof(struct op_msr) * model->num_controls;
+       size_t counters_size = sizeof(struct op_msr) * model->num_counters;
+
+       int i;
+       for (i = 0; i < NR_CPUS; ++i) {
+               if (!cpu_online(i))
+                       continue;
+
+               cpu_msrs[i].counters = kmalloc(counters_size, GFP_KERNEL);
+               if (!cpu_msrs[i].counters) {
+                       success = 0;
+                       break;
+               }
+               cpu_msrs[i].controls = kmalloc(controls_size, GFP_KERNEL);
+               if (!cpu_msrs[i].controls) {
+                       success = 0;
+                       break;
+               }
+       }
+
+       if (!success)
+               free_msrs();
+
+       return success;
+}
+
+
 static void nmi_cpu_setup(void * dummy)
 {
        int cpu = smp_processor_id();
@@ -125,6 +168,9 @@ static void nmi_cpu_setup(void * dummy)
 
 static int nmi_setup(void)
 {
+       if (!allocate_msrs())
+               return -ENOMEM;
+
        /* We walk a thin line between law and rape here.
         * We need to be careful to install our NMI handler
         * without actually triggering any NMIs as this will
@@ -142,20 +188,20 @@ static void nmi_restore_registers(struct op_msrs * msrs)
 {
        unsigned int const nr_ctrs = model->num_counters;
        unsigned int const nr_ctrls = model->num_controls; 
-       struct op_msr_group * counters = &msrs->counters;
-       struct op_msr_group * controls = &msrs->controls;
+       struct op_msr * counters = msrs->counters;
+       struct op_msr * controls = msrs->controls;
        unsigned int i;
 
        for (i = 0; i < nr_ctrls; ++i) {
-               wrmsr(controls->addrs[i],
-                       controls->saved[i].low,
-                       controls->saved[i].high);
+               wrmsr(controls[i].addr,
+                       controls[i].saved.low,
+                       controls[i].saved.high);
        }
  
        for (i = 0; i < nr_ctrs; ++i) {
-               wrmsr(counters->addrs[i],
-                       counters->saved[i].low,
-                       counters->saved[i].high);
+               wrmsr(counters[i].addr,
+                       counters[i].saved.low,
+                       counters[i].saved.high);
        }
 }
  
@@ -185,6 +231,7 @@ static void nmi_shutdown(void)
        on_each_cpu(nmi_cpu_shutdown, NULL, 0, 1);
        unset_nmi_callback();
        enable_lapic_nmi_watchdog();
+       free_msrs();
 }
 
  
index 4f55d1da6f60089998540414c62cbe43bf59d71c..5f6cc84abfa763651d3814c07c6e671622c210e0 100644 (file)
 #define NUM_COUNTERS 4
 #define NUM_CONTROLS 4
 
-#define CTR_READ(l,h,msrs,c) do {rdmsr(msrs->counters.addrs[(c)], (l), (h));} while (0)
-#define CTR_WRITE(l,msrs,c) do {wrmsr(msrs->counters.addrs[(c)], -(unsigned int)(l), -1);} while (0)
+#define CTR_READ(l,h,msrs,c) do {rdmsr(msrs->counters[(c)].addr, (l), (h));} while (0)
+#define CTR_WRITE(l,msrs,c) do {wrmsr(msrs->counters[(c)].addr, -(unsigned int)(l), -1);} while (0)
 #define CTR_OVERFLOWED(n) (!((n) & (1U<<31)))
 
-#define CTRL_READ(l,h,msrs,c) do {rdmsr(msrs->controls.addrs[(c)], (l), (h));} while (0)
-#define CTRL_WRITE(l,h,msrs,c) do {wrmsr(msrs->controls.addrs[(c)], (l), (h));} while (0)
+#define CTRL_READ(l,h,msrs,c) do {rdmsr(msrs->controls[(c)].addr, (l), (h));} while (0)
+#define CTRL_WRITE(l,h,msrs,c) do {wrmsr(msrs->controls[(c)].addr, (l), (h));} while (0)
 #define CTRL_SET_ACTIVE(n) (n |= (1<<22))
 #define CTRL_SET_INACTIVE(n) (n &= ~(1<<22))
 #define CTRL_CLEAR(x) (x &= (1<<21))
@@ -39,15 +39,15 @@ static unsigned long reset_value[NUM_COUNTERS];
  
 static void athlon_fill_in_addresses(struct op_msrs * const msrs)
 {
-       msrs->counters.addrs[0] = MSR_K7_PERFCTR0;
-       msrs->counters.addrs[1] = MSR_K7_PERFCTR1;
-       msrs->counters.addrs[2] = MSR_K7_PERFCTR2;
-       msrs->counters.addrs[3] = MSR_K7_PERFCTR3;
-
-       msrs->controls.addrs[0] = MSR_K7_EVNTSEL0;
-       msrs->controls.addrs[1] = MSR_K7_EVNTSEL1;
-       msrs->controls.addrs[2] = MSR_K7_EVNTSEL2;
-       msrs->controls.addrs[3] = MSR_K7_EVNTSEL3;
+       msrs->counters[0].addr = MSR_K7_PERFCTR0;
+       msrs->counters[1].addr = MSR_K7_PERFCTR1;
+       msrs->counters[2].addr = MSR_K7_PERFCTR2;
+       msrs->counters[3].addr = MSR_K7_PERFCTR3;
+
+       msrs->controls[0].addr = MSR_K7_EVNTSEL0;
+       msrs->controls[1].addr = MSR_K7_EVNTSEL1;
+       msrs->controls[2].addr = MSR_K7_EVNTSEL2;
+       msrs->controls[3].addr = MSR_K7_EVNTSEL3;
 }
 
  
index 1b81fc4299cd38db3b2a25930ae5f2c24dbe7a55..7e41dc9826a1fb0b41101a539f4440d3d7c6523e 100644 (file)
@@ -366,8 +366,8 @@ static struct p4_event_binding p4_events[NUM_EVENTS] = {
 #define CCCR_SET_PMI_OVF_1(cccr) ((cccr) |= (1<<27))
 #define CCCR_SET_ENABLE(cccr) ((cccr) |= (1<<12))
 #define CCCR_SET_DISABLE(cccr) ((cccr) &= ~(1<<12))
-#define CCCR_READ(low, high, i) do {rdmsr (p4_counters[(i)].cccr_address, (low), (high));} while (0)
-#define CCCR_WRITE(low, high, i) do {wrmsr (p4_counters[(i)].cccr_address, (low), (high));} while (0)
+#define CCCR_READ(low, high, i) do {rdmsr(p4_counters[(i)].cccr_address, (low), (high));} while (0)
+#define CCCR_WRITE(low, high, i) do {wrmsr(p4_counters[(i)].cccr_address, (low), (high));} while (0)
 #define CCCR_OVF_P(cccr) ((cccr) & (1U<<31))
 #define CCCR_CLEAR_OVF(cccr) ((cccr) &= (~(1U<<31)))
 
@@ -410,7 +410,7 @@ static void p4_fill_in_addresses(struct op_msrs * const msrs)
 
        /* the counter registers we pay attention to */
        for (i = 0; i < num_counters; ++i) {
-               msrs->counters.addrs[i] = 
+               msrs->counters[i].addr = 
                        p4_counters[VIRT_CTR(stag, i)].counter_address;
        }
 
@@ -419,42 +419,42 @@ static void p4_fill_in_addresses(struct op_msrs * const msrs)
        /* 18 CCCR registers */
        for (i = 0, addr = MSR_P4_BPU_CCCR0 + stag;
             addr <= MSR_P4_IQ_CCCR5; ++i, addr += addr_increment()) {
-               msrs->controls.addrs[i] = addr;
+               msrs->controls[i].addr = addr;
        }
        
        /* 43 ESCR registers in three discontiguous group */
        for (addr = MSR_P4_BSU_ESCR0 + stag;
             addr <= MSR_P4_SSU_ESCR0; ++i, addr += addr_increment()) { 
-               msrs->controls.addrs[i] = addr;
+               msrs->controls[i].addr = addr;
        }
        
        for (addr = MSR_P4_MS_ESCR0 + stag;
             addr <= MSR_P4_TC_ESCR1; ++i, addr += addr_increment()) { 
-               msrs->controls.addrs[i] = addr;
+               msrs->controls[i].addr = addr;
        }
        
        for (addr = MSR_P4_IX_ESCR0 + stag;
             addr <= MSR_P4_CRU_ESCR3; ++i, addr += addr_increment()) { 
-               msrs->controls.addrs[i] = addr;
+               msrs->controls[i].addr = addr;
        }
 
        /* there are 2 remaining non-contiguously located ESCRs */
 
        if (num_counters == NUM_COUNTERS_NON_HT) {              
                /* standard non-HT CPUs handle both remaining ESCRs*/
-               msrs->controls.addrs[i++] = MSR_P4_CRU_ESCR5;
-               msrs->controls.addrs[i++] = MSR_P4_CRU_ESCR4;
+               msrs->controls[i++].addr = MSR_P4_CRU_ESCR5;
+               msrs->controls[i++].addr = MSR_P4_CRU_ESCR4;
 
        } else if (stag == 0) {
                /* HT CPUs give the first remainder to the even thread, as
                   the 32nd control register */
-               msrs->controls.addrs[i++] = MSR_P4_CRU_ESCR4;
+               msrs->controls[i++].addr = MSR_P4_CRU_ESCR4;
 
        } else {
                /* and two copies of the second to the odd thread,
                   for the 22st and 23nd control registers */
-               msrs->controls.addrs[i++] = MSR_P4_CRU_ESCR5;
-               msrs->controls.addrs[i++] = MSR_P4_CRU_ESCR5;
+               msrs->controls[i++].addr = MSR_P4_CRU_ESCR5;
+               msrs->controls[i++].addr = MSR_P4_CRU_ESCR5;
        }
 }
 
index d9f38d6853955415137bf594196f55467d6aeb6a..68aed25442a5d33497f41c1860fc2660cc8ed571 100644 (file)
 #define NUM_COUNTERS 2
 #define NUM_CONTROLS 2
 
-#define CTR_READ(l,h,msrs,c) do {rdmsr(msrs->counters.addrs[(c)], (l), (h));} while (0)
-#define CTR_WRITE(l,msrs,c) do {wrmsr(msrs->counters.addrs[(c)], -(u32)(l), -1);} while (0)
+#define CTR_READ(l,h,msrs,c) do {rdmsr(msrs->counters[(c)].addr, (l), (h));} while (0)
+#define CTR_WRITE(l,msrs,c) do {wrmsr(msrs->counters[(c)].addr, -(u32)(l), -1);} while (0)
 #define CTR_OVERFLOWED(n) (!((n) & (1U<<31)))
 
-#define CTRL_READ(l,h,msrs,c) do {rdmsr((msrs->controls.addrs[(c)]), (l), (h));} while (0)
-#define CTRL_WRITE(l,h,msrs,c) do {wrmsr((msrs->controls.addrs[(c)]), (l), (h));} while (0)
+#define CTRL_READ(l,h,msrs,c) do {rdmsr((msrs->controls[(c)].addr), (l), (h));} while (0)
+#define CTRL_WRITE(l,h,msrs,c) do {wrmsr((msrs->controls[(c)].addr), (l), (h));} while (0)
 #define CTRL_SET_ACTIVE(n) (n |= (1<<22))
 #define CTRL_SET_INACTIVE(n) (n &= ~(1<<22))
 #define CTRL_CLEAR(x) (x &= (1<<21))
@@ -39,11 +39,11 @@ static unsigned long reset_value[NUM_COUNTERS];
  
 static void ppro_fill_in_addresses(struct op_msrs * const msrs)
 {
-       msrs->counters.addrs[0] = MSR_P6_PERFCTR0;
-       msrs->counters.addrs[1] = MSR_P6_PERFCTR1;
+       msrs->counters[0].addr = MSR_P6_PERFCTR0;
+       msrs->counters[1].addr = MSR_P6_PERFCTR1;
        
-       msrs->controls.addrs[0] = MSR_P6_EVNTSEL0;
-       msrs->controls.addrs[1] = MSR_P6_EVNTSEL1;
+       msrs->controls[0].addr = MSR_P6_EVNTSEL0;
+       msrs->controls[1].addr = MSR_P6_EVNTSEL1;
 }
 
 
index e0da54abf7b86b897890b8f6991c4299676b7508..5cc2514670f3fecedf969297aab1097203cd238f 100644 (file)
 #ifndef OP_X86_MODEL_H
 #define OP_X86_MODEL_H
 
-/* Pentium IV needs all these */
-#define MAX_MSR 63
 struct op_saved_msr {
        unsigned int high;
        unsigned int low;
 };
 
-struct op_msr_group {
-       unsigned int addrs[MAX_MSR];
-       struct op_saved_msr saved[MAX_MSR];
+struct op_msr {
+       unsigned long addr;
+       struct op_saved_msr saved;
 };
 
 struct op_msrs {
-       struct op_msr_group counters;
-       struct op_msr_group controls;
+       struct op_msr * counters;
+       struct op_msr * controls;
 };
 
 struct pt_regs;