]> git.hungrycats.org Git - linux/commitdiff
[PATCH] x86-64 update for 2.5.5
authorAndi Kleen <ak@muc.de>
Tue, 26 Feb 2002 05:16:06 +0000 (21:16 -0800)
committerLinus Torvalds <torvalds@penguin.transmeta.com>
Tue, 26 Feb 2002 05:16:06 +0000 (21:16 -0800)
This patch makes x86-64 compile in 2.5.5 and syncs it with changes in the i386
port. It also fixes some bugs that were discovered in recent testing:
- enhance 32bit emulation and fix bugs.
- fix security hole in vmalloc handling
- Do not use lockless gettimeofday for now because it is buggy.
The patch only changes x86_64 specific files.

-Andi

22 files changed:
arch/x86_64/config.in
arch/x86_64/defconfig
arch/x86_64/ia32/ia32_binfmt.c
arch/x86_64/ia32/ia32_ioctl.c
arch/x86_64/ia32/ia32_signal.c
arch/x86_64/kernel/process.c
arch/x86_64/kernel/ptrace.c
arch/x86_64/kernel/signal.c
arch/x86_64/kernel/time.c
arch/x86_64/kernel/vsyscall.c
arch/x86_64/kernel/x8664_ksyms.c
arch/x86_64/mm/fault.c
arch/x86_64/mm/init.c
arch/x86_64/mm/ioremap.c
arch/x86_64/tools/offset.c
include/asm-x86_64/bitops.h
include/asm-x86_64/mmu_context.h
include/asm-x86_64/page.h
include/asm-x86_64/pda.h
include/asm-x86_64/pgalloc.h
include/asm-x86_64/pgtable.h
include/asm-x86_64/system.h

index 03a0e59287368cedf5de49f4e59eae13cd9229e0..fecd3945c0d17e7804344ec896591cb6b048747f 100644 (file)
@@ -29,7 +29,7 @@ define_int CONFIG_X86_L1_CACHE_BYTES 64
 define_int CONFIG_X86_L1_CACHE_SHIFT 6
 define_bool CONFIG_X86_TSC y
 define_bool CONFIG_X86_GOOD_APIC y
-define_bool CONFIG_X86_CMPXCHG
+define_bool CONFIG_X86_CMPXCHG y
 
 tristate '/dev/cpu/*/msr - Model-specific register support' CONFIG_X86_MSR
 tristate '/dev/cpu/*/cpuid - CPU information support' CONFIG_X86_CPUID
@@ -72,6 +72,7 @@ bool 'Support for hot-pluggable devices' CONFIG_HOTPLUG
 
 if [ "$CONFIG_HOTPLUG" = "y" ] ; then
    source drivers/pcmcia/Config.in
+   source drivers/hotplug/Config.in
 else
    define_bool CONFIG_PCMCIA n
 fi
@@ -80,8 +81,8 @@ if [ "$CONFIG_PROC_FS" = "y" ]; then
    define_bool CONFIG_KCORE_ELF y
 fi
 # We probably are not going to support a.out, are we? Or should we support a.out in i386 compatibility mode?
-#tristate 'Kernel support for a.out binaries' CONFIG_BINFMT_AOUT
-tristate 'Kernel support for ELF binaries' CONFIG_BINFMT_ELF
+               #tristate 'Kernel support for a.out binaries' CONFIG_BINFMT_AOUT
+               tristate 'Kernel support for ELF binaries' CONFIG_BINFMT_ELF
 tristate 'Kernel support for MISC binaries' CONFIG_BINFMT_MISC
 
 bool 'Power Management support' CONFIG_PM
index 61accc3f4c4f2fa7d9c1e7a807d1eebd200dcd33..b5eb95bd7f5f9e7eafe192be128ea3eaf71de40d 100644 (file)
@@ -37,6 +37,7 @@ CONFIG_X86_L1_CACHE_BYTES=64
 CONFIG_X86_L1_CACHE_SHIFT=6
 CONFIG_X86_TSC=y
 CONFIG_X86_GOOD_APIC=y
+CONFIG_X86_CMPXCHG=y
 CONFIG_X86_MSR=y
 CONFIG_X86_CPUID=y
 # CONFIG_MATH_EMULATION is not set
@@ -59,16 +60,7 @@ CONFIG_BINFMT_ELF=y
 # CONFIG_BINFMT_MISC is not set
 CONFIG_PM=y
 CONFIG_IA32_EMULATION=y
-CONFIG_ACPI=y
-CONFIG_ACPI_DEBUG=y
-CONFIG_ACPI_BUSMGR=y
-CONFIG_ACPI_SYS=y
-CONFIG_ACPI_CPU=y
-CONFIG_ACPI_BUTTON=y
-CONFIG_ACPI_AC=y
-CONFIG_ACPI_EC=y
-CONFIG_ACPI_CMBATT=y
-CONFIG_ACPI_THERMAL=y
+# CONFIG_ACPI is not set
 
 #
 # Memory Technology Devices (MTD)
@@ -99,9 +91,8 @@ CONFIG_ACPI_THERMAL=y
 # CONFIG_BLK_DEV_DAC960 is not set
 # CONFIG_BLK_DEV_LOOP is not set
 # CONFIG_BLK_DEV_NBD is not set
-CONFIG_BLK_DEV_RAM=y
-CONFIG_BLK_DEV_RAM_SIZE=4096
-CONFIG_BLK_DEV_INITRD=y
+# CONFIG_BLK_DEV_RAM is not set
+# CONFIG_BLK_DEV_INITRD is not set
 
 #
 # Multi-device support (RAID and LVM)
@@ -388,7 +379,6 @@ CONFIG_EXT2_FS=y
 # CONFIG_UDF_RW is not set
 # CONFIG_UFS_FS is not set
 # CONFIG_UFS_FS_WRITE is not set
-CONFIG_SIMICSFS=y
 
 #
 # Network File Systems
index f9baffde5df294224624c1210f6c83938c09c568..5502fb4f6d78edd20f4a54bd7a898671ca063898 100644 (file)
@@ -12,6 +12,9 @@
 #include <asm/ptrace.h>
 #include <asm/processor.h>
 
+struct file;
+struct elf_phdr; 
+
 #define IA32_EMULATOR 1
 
 #define IA32_PAGE_OFFSET 0xE0000000
@@ -77,7 +80,6 @@ do {                                                  \
        __asm__("movl %0,%%fs": :"r" (0)); \
        __asm__("movl %0,%%es; movl %0,%%ds": :"r" (__USER32_DS)); \
        wrmsrl(MSR_KERNEL_GS_BASE, 0); \
-       set_thread_flag(TIF_IA32); \
        (regs)->rip = (new_rip); \
        (regs)->rsp = (new_rsp); \
        (regs)->eflags = 0x200; \
@@ -87,6 +89,8 @@ do {                                                  \
 } while(0) 
 
 
+#define elf_map elf32_map
+
 MODULE_DESCRIPTION("Binary format loader for compatibility with IA32 ELF binaries."); 
 MODULE_AUTHOR("Eric Youngdale, Andi Kleen");
 
@@ -102,6 +106,7 @@ static void elf32_init(struct pt_regs *);
 
 static void elf32_init(struct pt_regs *regs)
 {
+       struct task_struct *me = current; 
        regs->rdi = 0;
        regs->rsi = 0;
        regs->rdx = 0;
@@ -109,9 +114,13 @@ static void elf32_init(struct pt_regs *regs)
        regs->rax = 0;
        regs->rbx = 0; 
        regs->rbp = 0; 
-        current->thread.fs = 0; current->thread.gs = 0;
-       current->thread.fsindex = 0; current->thread.gsindex = 0;
-        current->thread.ds = __USER_DS; current->thread.es == __USER_DS;
+    me->thread.fs = 0; 
+       me->thread.gs = 0;
+       me->thread.fsindex = 0; 
+       me->thread.gsindex = 0;
+    me->thread.ds = __USER_DS; 
+       me->thread.es = __USER_DS;
+       set_thread_flag(TIF_IA32); 
 }
 
 extern void put_dirty_page(struct task_struct * tsk, struct page *page, unsigned long address);
@@ -162,4 +171,17 @@ int ia32_setup_arg_pages(struct linux_binprm *bprm)
        
        return 0;
 }
+static unsigned long
+elf32_map (struct file *filep, unsigned long addr, struct elf_phdr *eppnt, int prot, int type)
+{
+       unsigned long map_addr;
+       struct task_struct *me = current; 
+
+       down_write(&me->mm->mmap_sem);
+       map_addr = do_mmap(filep, ELF_PAGESTART(addr),
+                          eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr), prot, type|MAP_32BIT,
+                          eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr));
+       up_write(&me->mm->mmap_sem);
+       return(map_addr);
+}
 
index 5755a54c4606bfd357e0d1bcd0101c5b83c373ab..ca3b0e93e4644e9d89cb9f8b0e43de5640293e51 100644 (file)
@@ -3083,8 +3083,6 @@ COMPATIBLE_IOCTL(BLKROSET)
 COMPATIBLE_IOCTL(BLKROGET)
 COMPATIBLE_IOCTL(BLKRRPART)
 COMPATIBLE_IOCTL(BLKFLSBUF)
-COMPATIBLE_IOCTL(BLKRASET)
-COMPATIBLE_IOCTL(BLKFRASET)
 COMPATIBLE_IOCTL(BLKSECTSET)
 COMPATIBLE_IOCTL(BLKSSZGET)
 
@@ -3596,10 +3594,8 @@ HANDLE_IOCTL(SIOCDELRT, routing_ioctl)
 HANDLE_IOCTL(SIOCRTMSG, ret_einval)
 HANDLE_IOCTL(SIOCGSTAMP, do_siocgstamp)
 HANDLE_IOCTL(HDIO_GETGEO, hdio_getgeo)
-HANDLE_IOCTL(BLKRAGET, w_long)
 HANDLE_IOCTL(BLKGETSIZE, w_long)
 HANDLE_IOCTL(0x1260, broken_blkgetsize)
-HANDLE_IOCTL(BLKFRAGET, w_long)
 HANDLE_IOCTL(BLKSECTGET, w_long)
 HANDLE_IOCTL(BLKPG, blkpg_ioctl_trans)
 HANDLE_IOCTL(FBIOGETCMAP, fb_ioctl_trans)
index 3e4fd9a321ea1d1f92241efc3246e394e547e0d3..317f67439ffca4fb06183fe14d5ec23a7a6cae80 100644 (file)
@@ -82,7 +82,7 @@ sys32_sigsuspend(int history0, int history1, old_sigset_t mask, struct pt_regs r
        spin_lock_irq(&current->sigmask_lock);
        saveset = current->blocked;
        siginitset(&current->blocked, mask);
-       recalc_sigpending(current);
+       recalc_sigpending();
        spin_unlock_irq(&current->sigmask_lock);
 
        regs.rax = -EINTR;
@@ -225,7 +225,7 @@ asmlinkage int sys32_sigreturn(struct pt_regs regs)
        sigdelsetmask(&set, ~_BLOCKABLE);
        spin_lock_irq(&current->sigmask_lock);
        current->blocked = set;
-       recalc_sigpending(current);
+       recalc_sigpending();
        spin_unlock_irq(&current->sigmask_lock);
        
        if (restore_sigcontext(&regs, &frame->sc, &eax))
@@ -252,7 +252,7 @@ asmlinkage int sys32_rt_sigreturn(struct pt_regs regs)
        sigdelsetmask(&set, ~_BLOCKABLE);
        spin_lock_irq(&current->sigmask_lock);
        current->blocked = set;
-       recalc_sigpending(current);
+       recalc_sigpending();
        spin_unlock_irq(&current->sigmask_lock);
        
        if (restore_sigcontext(&regs, &frame->uc.uc_mcontext, &eax))
index f5523724fa3be162a90af5bac8bbcbff2775861c..c2f34a774088c5035d44293a312ec19f789a0b87 100644 (file)
@@ -140,7 +140,6 @@ void cpu_idle (void)
                while (!need_resched())
                        idle();
                schedule();
-               check_pgt_cache();
        }
 }
 
index 4ef88c530b9ec1c1a800ddc35862705f0a203f26..a5e390b18f8d6906cec70ff2aba8f441f4df8bd4 100644 (file)
@@ -420,9 +420,11 @@ asmlinkage void syscall_trace(struct pt_regs *regs)
        
        current->exit_code = SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD)
                                        ? 0x80 : 0);
+       preempt_disable();
        current->state = TASK_STOPPED;
        notify_parent(current, SIGCHLD);
        schedule();
+       preempt_enable();
        /*
         * this isn't the same as continuing with a signal, but it will do
         * for normal use.  strace only continues with a signal if the
index 21672259f79dde0c0652e3860d73179ccf5673df..add2016ca7e2a886c27c25e057a7d5afe6d18cba 100644 (file)
@@ -89,7 +89,7 @@ sys_rt_sigsuspend(sigset_t *unewset, size_t sigsetsize, struct pt_regs regs)
        spin_lock_irq(&current->sigmask_lock);
        saveset = current->blocked;
        current->blocked = newset;
-       recalc_sigpending(current);
+       recalc_sigpending();
        spin_unlock_irq(&current->sigmask_lock);
 #if DEBUG_SIG
        printk("rt_sigsuspend savset(%lx) newset(%lx) regs(%p) rip(%lx)\n",
@@ -200,7 +200,7 @@ asmlinkage long sys_rt_sigreturn(struct pt_regs regs)
        sigdelsetmask(&set, ~_BLOCKABLE);
        spin_lock_irq(&current->sigmask_lock);
        current->blocked = set;
-       recalc_sigpending(current);
+       recalc_sigpending();
        spin_unlock_irq(&current->sigmask_lock);
        
        if (restore_sigcontext(&regs, &frame->uc.uc_mcontext, &eax))
@@ -431,7 +431,7 @@ handle_signal(unsigned long sig, struct k_sigaction *ka,
                spin_lock_irq(&current->sigmask_lock);
                sigorsets(&current->blocked,&current->blocked,&ka->sa.sa_mask);
                sigaddset(&current->blocked,sig);
-               recalc_sigpending(current);
+               recalc_sigpending();
                spin_unlock_irq(&current->sigmask_lock);
        }
 }
@@ -473,9 +473,11 @@ int do_signal(struct pt_regs *regs, sigset_t *oldset)
                if ((current->ptrace & PT_PTRACED) && signr != SIGKILL) {
                        /* Let the debugger run.  */
                        current->exit_code = signr;
+                       preempt_disable();
                        current->state = TASK_STOPPED;
                        notify_parent(current, SIGCHLD);
                        schedule();
+                       preempt_enable();
 
                        /* We're back.  Did the debugger cancel the sig?  */
                        if (!(signr = current->exit_code))
@@ -530,12 +532,14 @@ int do_signal(struct pt_regs *regs, sigset_t *oldset)
 
                        case SIGSTOP: {
                                struct signal_struct *sig;
+                               preempt_disable(); 
                                current->state = TASK_STOPPED;
                                current->exit_code = signr;
                                sig = current->p_pptr->sig;
                                if (sig && !(sig->action[SIGCHLD-1].sa.sa_flags & SA_NOCLDSTOP))
                                        notify_parent(current, SIGCHLD);
                                schedule();
+                               preempt_enable();
                                continue;
                        }
 
index 39a2fe75995fd56e3990901fff89a6cd9804973e..62883a0172c68b42ae5c6d17188747314ee4fcbc 100644 (file)
@@ -120,6 +120,62 @@ spinlock_t i8253_lock = SPIN_LOCK_UNLOCKED;
 
 extern spinlock_t i8259A_lock;
 
+
+static inline unsigned long do_fast_gettimeoffset(void)
+{
+       register unsigned long eax, edx;
+
+       /* Read the Time Stamp Counter */
+
+       rdtsc(eax,edx);
+
+       /* .. relative to previous jiffy (32 bits is enough) */
+       eax -= last_tsc_low;    /* tsc_low delta */
+
+       /*
+         * Time offset = (tsc_low delta) * fast_gettimeoffset_quotient
+         *             = (tsc_low delta) * (usecs_per_clock)
+         *             = (tsc_low delta) * (usecs_per_jiffy / clocks_per_jiffy)
+        *
+        * Using a mull instead of a divl saves up to 31 clock cycles
+        * in the critical path.
+         */
+
+       edx = (eax*fast_gettimeoffset_quotient) >> 32;
+
+       /* our adjusted time offset in microseconds */
+       return delay_at_last_interrupt + edx;
+}
+
+/*
+ * This version of gettimeofday has microsecond resolution
+ * and better than microsecond precision on fast x86 machines with TSC.
+ */
+void do_gettimeofday(struct timeval *tv)
+{
+       unsigned long flags;
+       unsigned long usec, sec;
+
+       read_lock_irqsave(&xtime_lock, flags);
+       usec = do_gettimeoffset();
+       {
+               unsigned long lost = jiffies - wall_jiffies;
+               if (lost)
+                       usec += lost * (1000000 / HZ);
+       }
+       sec = xtime.tv_sec;
+       usec += xtime.tv_usec;
+       read_unlock_irqrestore(&xtime_lock, flags);
+
+       while (usec >= 1000000) {
+               usec -= 1000000;
+               sec++;
+       }
+
+       tv->tv_sec = sec;
+       tv->tv_usec = usec;
+}
+
 void do_settimeofday(struct timeval *tv)
 {
        write_lock_irq(&xtime_lock);
@@ -484,7 +540,7 @@ void __init time_init(void)
                         * clock/second. Our precision is about 100 ppm.
                         */
                        {                       
-                               cpu_khz = ((1000000*(1UL<<32)) / tsc_quotient); /* FIXME: is it right? */
+                               cpu_khz = ((1000*(1UL<<32)) / tsc_quotient); 
                                printk("Detected %ld Hz processor.\n", cpu_khz);
                        }
                }
index 31dade42b17b8caea09f833cdccba49fee62fda9..5347df1a5bd34f31d793a5c287188668dd81abbf 100644 (file)
@@ -60,9 +60,6 @@ static inline void timeval_normalize(struct timeval * tv)
 
 long __vxtime_sequence[2] __section_vxtime_sequence;
 
-/* The rest of the kernel knows it as this. */
-extern void do_gettimeofday(struct timeval *tv) __attribute__((alias("do_vgettimeofday"))); 
-
 inline void do_vgettimeofday(struct timeval * tv)
 {
        long sequence;
index 5499e8e720384d5eb6f7916127a83ff1e30be46f..0845e8867b4e4a24561d97204e3b9a94e4a4a5c2 100644 (file)
@@ -89,7 +89,6 @@ EXPORT_SYMBOL_NOVERS(__put_user_4);
 
 EXPORT_SYMBOL(strtok);
 EXPORT_SYMBOL(strpbrk);
-EXPORT_SYMBOL(simple_strtol);
 EXPORT_SYMBOL(strstr);
 
 EXPORT_SYMBOL(strncpy_from_user);
index d0d33d0234a003d338c3ed3dbef858c38fc3efa5..3da6640084a5586c1c8fdb904ca6a8a39197fb4b 100644 (file)
@@ -112,7 +112,7 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long error_code)
        mm = tsk->mm;
        info.si_code = SEGV_MAPERR;
 
-       if (address >= TASK_SIZE
+       if (address >= TASK_SIZE && !(error_code & 5))
                goto vmalloc_fault;
 
 
index c270db355b1160267f177a372e84558092aea8ca..3363cd71f7ddadca679043a4e8f893ab74a5e45e 100644 (file)
@@ -1,8 +1,9 @@
 /*
- *  linux/arch/i386/mm/init.c
+ *  linux/arch/x86_64/mm/init.c
  *
  *  Copyright (C) 1995  Linus Torvalds
  *  Copyright (C) 2000  Pavel Machek <pavel@suse.cz>
+ *  Copyright (C) 2002  Andi Kleen <ak@suse.de>
  */
 
 #include <linux/config.h>
@@ -39,28 +40,6 @@ mmu_gather_t mmu_gathers[NR_CPUS];
 
 static unsigned long totalram_pages;
 
-int do_check_pgt_cache(int low, int high)
-{
-       int freed = 0;
-       if(read_pda(pgtable_cache_sz) > high) {
-               do {
-                       if (read_pda(pgd_quick)) {
-                               pgd_free_slow(pgd_alloc_one_fast());
-                               freed++;
-                       }
-                       if (read_pda(pmd_quick)) {
-                               pmd_free_slow(pmd_alloc_one_fast(NULL, 0));
-                               freed++;
-                       }
-                       if (read_pda(pte_quick)) {
-                               pte_free_slow(pte_alloc_one_fast(NULL, 0));
-                               freed++;
-                       }
-               } while(read_pda(pgtable_cache_sz) > low);
-       }
-       return freed;
-}
-
 /*
  * NOTE: pagetable_init alloc all the fixmap pagetables contiguous on the
  * physical space so we can cache the place of the first one and move
@@ -89,7 +68,6 @@ void show_mem(void)
        printk("%d reserved pages\n",reserved);
        printk("%d pages shared\n",shared);
        printk("%d pages swap cached\n",cached);
-       printk("%ld pages in page table cache\n",read_pda(pgtable_cache_sz));
        show_buffers();
 }
 
@@ -138,12 +116,12 @@ static void set_pte_phys(unsigned long vaddr,
        if (pmd_none(*pmd)) {
                pte = (pte_t *) spp_getpage();
                set_pmd(pmd, __pmd(__pa(pte) + 0x7));
-               if (pte != pte_offset(pmd, 0)) {
+               if (pte != pte_offset_kernel(pmd, 0)) {
                        printk("PAGETABLE BUG #02!\n");
                        return;
                }
        }
-       pte = pte_offset(pmd, vaddr);
+       pte = pte_offset_kernel(pmd, vaddr);
        if (pte_val(*pte))
                pte_ERROR(*pte);
        set_pte(pte, mk_pte_phys(phys, prot));
index 55b8b3fcf07697e8f185d7d15abeda34d4e4fa21..27e393d4f500d38a0c95faecaa793e9a6d962602 100644 (file)
@@ -49,7 +49,7 @@ static inline int remap_area_pmd(pmd_t * pmd, unsigned long address, unsigned lo
        if (address >= end)
                BUG();
        do {
-               pte_t * pte = pte_alloc(&init_mm, pmd, address);
+               pte_t * pte = pte_alloc_kernel(&init_mm, pmd, address);
                if (!pte)
                        return -ENOMEM;
                remap_area_pte(pte, address, end - address, address + phys_addr, flags);
index 88e72b0f3850b9f66ed40c0f906300a02b3849f2..b322932479090e5959857a03b1ada77fd54d2a9e 100644 (file)
@@ -42,10 +42,6 @@ int main(void)
        ENTRY(irqrsp);
        ENTRY(irqcount);
        ENTRY(irqstack); 
-       ENTRY(pgd_quick);
-       ENTRY(pmd_quick);
-       ENTRY(pte_quick);
-       ENTRY(pgtable_cache_sz);
        ENTRY(cpunumber);
        ENTRY(irqstackptr);
        ENTRY(me);
index bf2ef10e373127ec416a95b3d779f1e2e75802df..94d4ee1ec94a0804469321551e769234eb36866d 100644 (file)
@@ -413,6 +413,16 @@ static __inline__ unsigned long __ffs(unsigned long word)
 
 #ifdef __KERNEL__
 
+static inline int sched_find_first_bit(unsigned long *b)
+{
+       if (b[0])
+               return __ffs(b[0]);
+       if (b[1])
+               return __ffs(b[1]) + 64;
+       if (b[2])
+               return __ffs(b[2]) + 128;
+}
+
 /**
  * ffs - find first bit set
  * @x: the word to search
index 94bb87ae7a821cb9c9145a11d4b0bc0a22ee287c..b7b44930381e8799fe9494bf32457acebc235ac7 100644 (file)
@@ -6,33 +6,6 @@
 #include <asm/atomic.h>
 #include <asm/pgalloc.h>
 
-/*
- * Every architecture must define this function. It's the fastest
- * way of searching a 168-bit bitmap where the first 128 bits are
- * unlikely to be set. It's guaranteed that at least one of the 168
- * bits is cleared.
- */
-#if MAX_RT_PRIO != 128 || MAX_PRIO != 168
-# error update this function.
-#endif
-
-static inline int __sched_find_first_bit(unsigned long *b)
-{
-       if (b[0])
-               return __ffs(b[0]);
-       if (b[1])
-               return __ffs(b[1]) + 64;
-       if (b[2])
-               return __ffs(b[2]) + 128;
-}
-
-static inline int sched_find_first_bit(unsigned long *b)
-{ 
-       int n = __sched_find_first_bit(b);
-       BUG_ON((unsigned)n > 167);
-       return n; 
-} 
-
 /*
  * possibly do the LDT unload here?
  */
index 48a878deddb4f6c18e18a19c55b2847456d0b1cc..7380af4af4ce7b87896c566091f50d4e8b6f45a3 100644 (file)
@@ -112,6 +112,8 @@ static unsigned long start_kernel_map __attribute__((unused)) = __START_KERNEL_m
 #define virt_to_page(kaddr)    (mem_map + (__pa(kaddr) >> PAGE_SHIFT))
 #define VALID_PAGE(page)       ((page - mem_map) < max_mapnr)
 
+#define VM_DATA_DEFAULT_FLAGS  (VM_READ | VM_WRITE | VM_EXEC | \
+                                VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
 
 #endif /* __KERNEL__ */
 
index 396dc01608ac57d5a65e40b97f6321fe14a97b20..366410f6833b3ee7f8e18b1ced6077e2ed70e056 100644 (file)
@@ -19,11 +19,6 @@ struct x8664_pda {
        struct task_struct *pcurrent;   /* Current process */
         int irqcount;              /* Irq nesting counter. Starts with -1 */   
        int cpunumber;              /* Logical CPU number */
-       /* XXX: could be a single list */
-       unsigned long *pgd_quick;
-       unsigned long *pmd_quick;
-       unsigned long *pte_quick;
-       unsigned long pgtable_cache_sz;
        char *irqstackptr;        
        unsigned int __softirq_pending;
        unsigned int __local_irq_count;
index 1d5fb0c9e51eda60920339ea07f6158e1915410f..8f26e2dfd3a647a5d0edd3c55c8d7ec53ce58d21 100644 (file)
 #include <linux/threads.h>
 #include <linux/mm.h>
 
-#define inc_pgcache_size() add_pda(pgtable_cache_sz,1UL)
-#define dec_pgcache_size() sub_pda(pgtable_cache_sz,1UL)
-
-#define pmd_populate(mm, pmd, pte) \
+#define pmd_populate_kernel(mm, pmd, pte) \
                set_pmd(pmd, __pmd(_PAGE_TABLE | __pa(pte)))
 #define pgd_populate(mm, pgd, pmd) \
                set_pgd(pgd, __pgd(_PAGE_TABLE | __pa(pmd)))
 
-extern __inline__ pmd_t *get_pmd_slow(void)
+static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, struct page *pte)
 {
-       pmd_t *ret = (pmd_t *)__get_free_page(GFP_KERNEL);
-
-       if (ret)
-               memset(ret, 0, PAGE_SIZE);
-       return ret;
+       set_pmd(pmd, __pmd(_PAGE_TABLE | 
+                          ((u64)(pte - mem_map) << PAGE_SHIFT))); 
 }
 
-extern __inline__ pmd_t *get_pmd_fast(void)
+extern __inline__ pmd_t *get_pmd(void)
 {
-       unsigned long *ret;
-
-       preempt_disable(); 
-       ret = read_pda(pmd_quick);
-       if (ret) {
-               write_pda(pmd_quick, (unsigned long *)(*ret));
-               ret[0] = 0;
-               dec_pgcache_size();
-       }
-       preempt_enable(); 
-       if (!ret)
-               ret = (unsigned long *)get_pmd_slow();
-       return (pmd_t *)ret;
+       return (pmd_t *)get_zeroed_page(GFP_KERNEL);
 }
 
 extern __inline__ void pmd_free(pmd_t *pmd)
-{
-       preempt_disable(); 
-       *(unsigned long *)pmd = (unsigned long) read_pda(pmd_quick);
-       write_pda(pmd_quick,(unsigned long *) pmd);
-       inc_pgcache_size();
-       preempt_enable(); 
-}
-
-extern __inline__ void pmd_free_slow(pmd_t *pmd)
 {
        if ((unsigned long)pmd & (PAGE_SIZE-1)) 
                BUG(); 
        free_page((unsigned long)pmd);
 }
 
-static inline pmd_t *pmd_alloc_one_fast (struct mm_struct *mm, unsigned long addr)
-{
-       unsigned long *ret;
-
-       preempt_disable(); 
-       ret = (unsigned long *)read_pda(pmd_quick);
-
-       if (__builtin_expect(ret != NULL, 1)) {
-               write_pda(pmd_quick, (unsigned long *)(*ret));
-               ret[0] = 0;
-               dec_pgcache_size();
-       }
-       preempt_enable(); 
-       return (pmd_t *)ret;
-}
-
 static inline pmd_t *pmd_alloc_one (struct mm_struct *mm, unsigned long addr)
 {
-       pmd_t *pmd = (pmd_t *) __get_free_page(GFP_KERNEL);
-
-       if (__builtin_expect(pmd != NULL, 1))
-               clear_page(pmd);
-       return pmd;
-}
-
-
-static inline pgd_t *pgd_alloc_one_fast (void)
-{
-       unsigned long *ret;
-
-       preempt_disable(); 
-       ret = read_pda(pgd_quick);
-       if (likely(ret != NULL)) {
-               write_pda(pgd_quick,(unsigned long *)(*ret));
-               ret[0] = 0;
-               dec_pgcache_size();
-       }
-       preempt_enable(); 
-       return (pgd_t *) ret;
+       return (pmd_t *) get_zeroed_page(GFP_KERNEL); 
 }
 
 static inline pgd_t *pgd_alloc (struct mm_struct *mm)
 {
-       /* the VM system never calls pgd_alloc_one_fast(), so we do it here. */
-       pgd_t *pgd = pgd_alloc_one_fast();
-
-       if (pgd == NULL) {
-               pgd = (pgd_t *)__get_free_page(GFP_KERNEL);
-               if (__builtin_expect(pgd != NULL, 1))
-                       clear_page(pgd);
-       }
-       return pgd;
+       return (pgd_t *)get_zeroed_page(GFP_KERNEL);
 }
 
 static inline void pgd_free (pgd_t *pgd)
-{
-       preempt_disable();
-       *(unsigned long *)pgd = (unsigned long) read_pda(pgd_quick);
-       write_pda(pgd_quick,(unsigned long *) pgd);
-       inc_pgcache_size();
-       preempt_enable();
-}
-
-
-static inline void pgd_free_slow (pgd_t *pgd)
 {
        if ((unsigned long)pgd & (PAGE_SIZE-1)) 
                BUG(); 
        free_page((unsigned long)pgd);
 }
 
-
-static inline pte_t *pte_alloc_one(struct mm_struct *mm, unsigned long address)
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
 {
-       pte_t *pte;
-
-       pte = (pte_t *) __get_free_page(GFP_KERNEL);
-       if (pte)
-               clear_page(pte);
-       return pte;
+       return (pte_t *) get_zeroed_page(GFP_KERNEL);
 }
 
-extern __inline__ pte_t *pte_alloc_one_fast(struct mm_struct *mm, unsigned long address)
+static inline struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address)
 {
-       unsigned long *ret;
-
-       preempt_disable(); 
-       if(__builtin_expect((ret = read_pda(pte_quick)) != NULL, !0)) {  
-               write_pda(pte_quick, (unsigned long *)(*ret));
-               ret[0] = ret[1];
-               dec_pgcache_size();
-       }
-       preempt_enable(); 
-       return (pte_t *)ret;
+       void *p = (void *)get_zeroed_page(GFP_KERNEL); 
+       if (!p)
+               return NULL;
+       return virt_to_page(p);
 }
 
-/* Should really implement gc for free page table pages. This could be done with 
-   a reference count in struct page. */
+/* Should really implement gc for free page table pages. This could be
+   done with a reference count in struct page. */
 
-extern __inline__ void pte_free(pte_t *pte)
-{      
-       preempt_disable();
-       *(unsigned long *)pte = (unsigned long) read_pda(pte_quick);
-       write_pda(pte_quick, (unsigned long *) pte); 
-       inc_pgcache_size();
-       preempt_enable();
-}
-
-extern __inline__ void pte_free_slow(pte_t *pte)
+extern __inline__ void pte_free_kernel(pte_t *pte)
 {
        if ((unsigned long)pte & (PAGE_SIZE-1))
                BUG();
        free_page((unsigned long)pte); 
 }
 
+extern inline void pte_free(struct page *pte)
+{
+       __free_page(pte);
+} 
 
-extern int do_check_pgt_cache(int, int);
 
 /*
  * TLB flushing:
index aa884aa3c8c1396f7afb5d7da41bb72e680cb0a1..5127ce27582b3fcac89898a7745f7c0775d2fa3b 100644 (file)
@@ -26,7 +26,7 @@ extern pgd_t level3_ident_pgt[512], swapper_pg_dir[512];
 extern pmd_t level2_kernel_pgt[512];
 extern void paging_init(void);
 
-/* Caches aren't brain-dead on the intel. */
+/* Caches aren't brain-dead. */
 #define flush_cache_all()                      do { } while (0)
 #define flush_cache_mm(mm)                     do { } while (0)
 #define flush_cache_range(vma, start, end)     do { } while (0)
@@ -35,6 +35,7 @@ extern void paging_init(void);
 #define flush_dcache_page(page)                        do { } while (0)
 #define flush_icache_range(start, end)         do { } while (0)
 #define flush_icache_page(vma,pg)              do { } while (0)
+#define flush_icache_user_range(vma,pg,adr,len)       do { } while (0)
 
 #define __flush_tlb()                                                  \
        do {                                                            \
@@ -341,8 +342,10 @@ extern inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
 
 #define page_pte(page) page_pte_prot(page, __pgprot(0))
 
-#define pmd_page(pmd) \
+#define pmd_page_kernel(pmd) \
 ((unsigned long) __va(pmd_val(pmd) & PAGE_MASK))
+#define pmd_page(pmd) \
+       (mem_map + (pmd_val(pmd) >> PAGE_SHIFT))
 
 /* to find an entry in a page-table-directory. */
 #define pgd_index(address) ((address >> PGDIR_SHIFT) & (PTRS_PER_PGD-1))
@@ -360,9 +363,15 @@ extern inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
 /* Find an entry in the third-level page table.. */
 #define __pte_offset(address) \
                ((address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
-#define pte_offset(dir, address) ((pte_t *) pmd_page(*(dir)) + \
+#define pte_offset_kernel(dir, address) ((pte_t *) pmd_page_kernel(*(dir)) + \
                        __pte_offset(address))
 
+#define pte_offset_map(dir,address) pte_offset_kernel(dir,address)
+#define pte_offset_map_nested(dir,address) pte_offset_kernel(dir,address)
+#define pte_unmap(pte) /* NOP */
+#define pte_unmap_nested(pte) /* NOP */ 
+
+
 /* never use these in the common code */
 #define level4_page(level4) ((unsigned long) __va(level4_val(level4) & PAGE_MASK))
 #define level4_index(address) ((address >> LEVEL4_SHIFT) & (PTRS_PER_LEVEL4-1))
index d97077c0a6bcc8e5dfe5924118a061fd86a949cc..f74d3384445806c26ae6f9f0880e71e63f4f4882 100644 (file)
@@ -18,7 +18,7 @@ extern void __switch_to(struct task_struct *prev, struct task_struct *next);
 
 #define prepare_to_switch()    do { } while(0)
 
-#define switch_to(prev,next,last) do {                                 \
+#define switch_to(prev,next) do {                                      \
        asm volatile("pushq %%rbp\n\t"                                  \
                     "pushq %%rbx\n\t"                                  \
                     "pushq %%r8\n\t"                                   \
@@ -30,10 +30,10 @@ extern void __switch_to(struct task_struct *prev, struct task_struct *next);
                     "pushq %%r14\n\t"                                  \
                     "pushq %%r15\n\t"                                  \
                     "movq %%rsp,%0\n\t"        /* save RSP */          \
-                    "movq %3,%%rsp\n\t"        /* restore RSP */       \
+                    "movq %2,%%rsp\n\t"        /* restore RSP */       \
                     "leaq 1f(%%rip),%%rbp\n\t"                         \
                     "movq %%rbp,%1\n\t"        /* save RIP */          \
-                    "pushq %4\n\t"             /* setup new RIP */     \
+                    "pushq %3\n\t"             /* setup new RIP */     \
                     "jmp __switch_to\n\t"              \
                     "1:\t"             \
                     "popq %%r15\n\t"                           \
@@ -46,8 +46,7 @@ extern void __switch_to(struct task_struct *prev, struct task_struct *next);
                     "popq %%r8\n\t"                                    \
                     "popq %%rbx\n\t"                                   \
                     "popq %%rbp\n\t"                                   \
-                    :"=m" (prev->thread.rsp),"=m" (prev->thread.rip),  \
-                     "=b" (last)                                       \
+                    :"=m" (prev->thread.rsp),"=m" (prev->thread.rip)   \
                     :"m" (next->thread.rsp),"m" (next->thread.rip),    \
                      "b" (prev), "S" (next), "D" (prev));              \
 } while (0)