]> git.hungrycats.org Git - linux/commitdiff
ia64: Fix fsys_gettimeofday() and tune it some more.
authorDavid Mosberger <davidm@tiger.hpl.hp.com>
Fri, 28 Feb 2003 07:28:03 +0000 (23:28 -0800)
committerDavid Mosberger <davidm@tiger.hpl.hp.com>
Fri, 28 Feb 2003 07:28:03 +0000 (23:28 -0800)
arch/ia64/kernel/fsys.S
arch/ia64/tools/print_offsets.c

index 18cf02e9e183df34bcee38cd4a4421f0c31e6c40..51fe29dec02889c35e8b8023b26cc63b35797c36 100644 (file)
@@ -3,11 +3,16 @@
  *
  * Copyright (C) 2003 Hewlett-Packard Co
  *     David Mosberger-Tang <davidm@hpl.hp.com>
+ *
+ * 18-Feb-03 louisk    Implement fsys_gettimeofday().
+ * 28-Feb-03 davidm    Fixed several bugs in fsys_gettimeofday().  Tuned it some more,
+ *                     probably broke it along the way... ;-)
  */
 
 #include <asm/asmmacro.h>
 #include <asm/errno.h>
 #include <asm/offsets.h>
+#include <asm/percpu.h>
 #include <asm/thread_info.h>
 
 /*
@@ -123,187 +128,171 @@ ENTRY(fsys_set_tid_address)
        br.ret.sptk.many b6
 END(fsys_set_tid_address)
 
+/*
+ * Note 1: This routine uses floating-point registers, but only with registers that
+ *        operate on integers.  Because of that, we don't need to set ar.fpsr to the
+ *        kernel default value.
+ *
+ * Note 2: For now, we will assume that all CPUs run at the same clock-frequency.
+ *        If that wasn't the case, we would have to disable preemption (e.g.,
+ *        by disabling interrupts) between reading the ITC and reading
+ *        local_cpu_data->nsec_per_cyc.
+ *
+ * Note 3: On platforms where the ITC-drift bit is set in the SAL feature vector,
+ *        we ought to either skip the ITC-based interpolation or run an ntp-like
+ *        daemon to keep the ITCs from drifting too far apart.
+ */
 ENTRY(fsys_gettimeofday)
-
        add r9=TI_FLAGS+IA64_TASK_SIZE,r16
+       movl r3=THIS_CPU(cpu_info)
+
+       mov.m r31=ar.itc                // put time stamp into r31 (ITC) == now         (35 cyc)
+       movl r19=xtime                  // xtime is a timespec struct
        ;;
+
+#ifdef CONFIG_SMP
+       movl r10 = __per_cpu_offset
+       ;;
+       ld8 r10 = [r10]                 // r10 <- __per_cpu_offset[0]
+       movl r21 = cpu_info__per_cpu
+       ;;
+       add r10 = r21, r10              // r10 <- &cpu_data(time_keeper_id)
+#else
+       mov r10 = r3
+#endif
        ld4 r9=[r9]
-       ;;      
-       and r9=TIF_ALLWORK_MASK,r9
+       movl r17=xtime_lock
        ;;
+
        // r32, r33 should contain the 2 args of gettimeofday
-       
-       tnat.nz p6,p7=r32               // in case the args are NaT
-       cmp.ne p8, p0=0, r9
+       adds r21=IA64_CPUINFO_ITM_NEXT_OFFSET, r10
+       mov r2 = -1
+       tnat.nz p6,p7=r32               // guard against NaT args
        ;;
-       
-(p7)   tnat.nz p6,p0=r33               
-(p8)   br.spnt.many fsys_fallback_syscall
+
+       adds r10=IA64_CPUINFO_ITM_DELTA_OFFSET, r10
+(p7)   tnat.nz p6,p0=r33
+(p6)   br.cond.spnt.few .fail
+
+       adds r8=IA64_CPUINFO_NSEC_PER_CYC_OFFSET, r3
+       movl r24=2361183241434822607    // for division hack (only for / 1000)
        ;;
-(p6)   adds r8=EINVAL, r0              // r8 = EINVAL
-(p6)   adds r10=-1, r0                 // r10 = -1     
-(p6)   br.ret.spnt.many b6             // return with r8 set to EINVAL
 
-       movl r17=xtime_lock
-       movl r19=xtime                  // xtime is a timespec struct
-       movl r20=cpu_info__per_cpu
+       ldf8 f7=[r10]                   // f7 now contains itm_delta
+       setf.sig f11 = r2
+       nop 0
+
+       adds r20=IA64_TIMESPEC_TV_NSEC_OFFSET, r19      // r20 = &xtime->tv_nsec
        movl r26=jiffies
-       movl r27=wall_jiffies
-       movl r31=last_nsec_offset
-       movl r24=2361183241434822607    // for division hack (only for / 1000)
-       ;;      
+
        setf.sig f9=r24                 // f9 is used for division hack
-       adds r21=IA64_CPUINFO_ITM_NEXT_OFFSET, r20
-       adds r22=IA64_CPUINFO_ITM_DELTA_OFFSET, r20
-       adds r30=IA64_CPUINFO_NSEC_PER_CYC_OFFSET, r20
-       adds r3=IA64_TIMESPEC_TV_NSEC_OFFSET, r19       
-                                       // r3 = &xtime->tv_nsec
-       
-       
-while_loop_1:
-
-       // *** seq = read_seqbegin(&xtime_lock); ***
-       
-       ld4 r23=[r17]                   // since &xtime_lock == &xtime_lock->sequence
-#ifdef CONFIG_SMP
-       mf
-#endif
-       ;;                              // barrier()
-       // now r23 = seq
-       
-       ld8 r14=[r31]                   // r14 = old = last_nsec_offset         
-       
+       movl r27=wall_jiffies
+
+       and r9=TIF_ALLWORK_MASK,r9
+       movl r25=last_nsec_offset
+       ;;
+
+       ldf8 f10=[r8]                   // f10 <- local_cpu_data->nsec_per_cyc value
+       cmp.ne p8, p0=0, r9
+(p8)   br.spnt.many fsys_fallback_syscall
+       ;;
+.retry:        // *** seq = read_seqbegin(&xtime_lock); ***
+       ld4.acq r23=[r17]               // since &xtime_lock == &xtime_lock->sequence
+       ld8 r14=[r25]                   // r14 (old) = last_nsec_offset
+
        ld8 r28=[r26]                   // r28 = jiffies
        ld8 r29=[r27]                   // r29 = wall_jiffies
        ;;
-       
-       ld8 r24=[r21]                   // r24 now contains itm_next
-       ld8 r25=[r22]                   // r25 now contains itm_delta
 
-       sub r28=r28, r29                // r28 now contains "lost"
+       ldf8 f8=[r21]                   // f8 now contains itm_next
+       sub r28=r29, r28, 1             // r28 now contains "-(lost + 1)"
+       tbit.nz p9, p10=r23, 0          // p9 <- is_odd(r23), p10 <- is_even(r23)
        ;;
-       adds r28=1, r28                 // r28 now contains "lost + 1"
-       ;;
-       setf.sig f6=r28
-       setf.sig f7=r25
-       
+
        ld8 r2=[r19]                    // r2 = sec = xtime.tv_sec
-       ;;
-       
-       ld8 r28=[r3]                    // r28 = nsec = xtime.tv_nsec
-       xma.l f8=f6, f7, f0             // put lower 64-bits result of f6 * f7 in f8
-       ;;
-       getf.sig r18=f8                 // r18 now contains the (lost + 1) * itm_delta
-       ;;
-       sub r18=r24, r18                // r18 is last_tick 
-       mov r25=ar.itc                  // put time stamp into r25 (ITC) == now
-       ;;
-       cmp.leu p7, p8 = r18, r25       // if last_tick <= now, p7 = 1
-       ;;
-(p7)   ld8 r24=[r30]                   // r24 contains local_cpu_data->nsec_per_cyc value
-(p7)   sub r25=r25, r18                // elasped_cycles in r25
-       ;;
-(p7)   setf.sig f6=r24
-(p7)   setf.sig f7=r25
-       ;;
-(p7)   xma.l f8=f6, f7, f0
+       ld8 r29=[r20]                   // r29 = nsec = xtime.tv_nsec
 
+       setf.sig f6=r28                 // f6 <- -(lost + 1)                            (6 cyc)
        ;;
-(p7)   getf.sig r18=f8                 // r18 = clasped_cycles * local_cpu_data->nsec_per_cyc
-       ;;      
-(p7)   shr.u r18=r18, IA64_NSEC_PER_CYC_SHIFT
-       
-(p8)   ld8 r18=[r31]                   // r18 = last_time_offset (is unsigned long)
 
-       // now end of gettimeoffset, r18 should contain the desire result (offset)
+       mf
+       xma.l f8=f6, f7, f8     // f8 (last_tick) <- -(lost + 1)*itm_delta + itm_next   (5 cyc)
+       nop 0
 
+       setf.sig f12=r31        // f12 <- ITC                                           (6 cyc)
        // *** if (unlikely(read_seqretry(&xtime_lock, seq))) continue; ***
-
-       ;;                              // barrier()
-       
-#ifdef CONFIG_SMP
-       mf
-#endif
-       adds r24=1, r0                  // r24 = 1
-       ld4 r25=[r17]                   // r25 = xtime_lock->sequence (load again)
-       ;;
-       and r24=r24, r23                // r24 = seq & 1
-       xor r25=r25, r23                // r25 = xtime_lock->sequence ^ seq
+       ld4 r24=[r17]                   // r24 = xtime_lock->sequence (re-read)
+       nop 0
        ;;
-       or r24=r24, r25                 // now r24 = read_seqretry(&xtime_lock, seq)
-       ;;
-       cmp.ne p7, p0=r24, r0
-       ;;
-(p7)   br.spnt.many while_loop_1       // continue
-       
-       cmp.leu p7, p8 = r18, r14       // if (offset <= old)
-       ;;
-(p7)   mov r18=r14                     // offset = old
-(p7)   br.spnt.few loop_exit_1         // break
-       
-       mov ar.ccv=r18                  // ar.ccv = offset
-       ;;
-       cmpxchg8.acq r25=[r31], r14, ar.ccv     
-                                       // compare-and-exchange (atomic!)
-       ;;
-       cmp.eq p8,p0 = r25, r14 
-       ;;
-(p8)   br.sptk.many loop_exit_1
-       br.sptk.many while_loop_1
 
-loop_exit_1:
+       mov r31 = ar.itc                // re-read ITC in case we .retry                (35 cyc)
+       xma.l f8=f11, f8, f12   // f8 (elapsed_cycles) <- (-1*last_tick + now) = (now - last_tick)
+       nop 0
+       ;;
 
-       // at this point, r28 is nsec and r18 is offset
+       getf.sig r18=f8                 // r18 <- (now - last_tick)
+       xmpy.l f8=f8, f10               // f8 <- elapsed_cycles*nsec_per_cyc (5 cyc)
+       add r3=r29, r14                 // r3 = (nsec + old)
+       ;;
 
-       add r3=r28, r18                 // r3 = (nsec + offset)
+       cmp.lt p7, p8 = r18, r0         // if now < last_tick, set p7 = 1, p8 = 0
+       getf.sig r18=f8                 // r18 = elapsed_cycles*nsec_per_cyc            (6 cyc)
+       nop 0
        ;;
-       // now we try to divide r3 by 1000 to get the value in usec instead of nsec
-       
-       shr.u r24 = r3, 3
+
+(p10)  cmp.ne p9, p0=r23, r24          // if xtime_lock->sequence != seq, set p9
+       shr.u r18=r18, IA64_NSEC_PER_CYC_SHIFT  // r18 <- offset
+(p9)   br.spnt.many .retry
        ;;
-       setf.sig f7 = r24
+
+       mov ar.ccv=r14                  // ar.ccv = old                                 (1 cyc)
+       cmp.leu p7, p8=r18, r14         // if (offset <= old), set p7 = 1, p8 = 0
        ;;
-       xmpy.hu f6 = f7, f9
+
+(p8)   cmpxchg8.rel r24=[r25], r18, ar.ccv     // compare-and-exchange (atomic!)
+(p8)   add r3=r29, r18                 // r3 = (nsec + offset)
        ;;
-       getf.sig r3 = f6
+       shr.u r3 = r3, 3                // initiate dividing r3 by 1000
        ;;
-       shr.u r3 = r3, 4
-       // end of division, r3 is divided by 1000 (=usec)
-       
-       addl r24=1000000, r0            // r24 = 1000000
+       setf.sig f8 = r3                //                                              (6 cyc)
+       mov r10=1000000                 // r10 = 1000000
        ;;
-       
-while_loop_2:
-       
-       cmp.geu p7, p8=r3, r24          // while (usec >= 1000000)
+(p8)   cmp.ne.unc p9, p0 = r24, r14
+       xmpy.hu f6 = f8, f9             //                                              (5 cyc)
+(p9)   br.spnt.many .retry
        ;;
-(p8)   br.sptk.many loop_exit_2
 
-       sub r3=r3, r24                  // usec -= 1000000
-       adds r2=1, r2                   // ++sec
-       
-       br.many while_loop_2
-       
-loop_exit_2:    
-       
-       // finally,     r2 = sec
-       //              r3 = usec
-       
-       mov r24=r32                     // we need to preserve this...
+       getf.sig r3 = f6                //                                              (6 cyc)
        ;;
-       st8 [r32]=r2, 8
+       shr.u r3 = r3, 4                // end of division, r3 is divided by 1000 (=usec)
        ;;
-       st8 [r32]=r3                    // store them in the timeval struct
+
+1:     cmp.geu p7, p0=r3, r10          // while (usec >= 1000000)
        ;;
-       mov r32=r24
-       
+(p7)   sub r3=r3, r10                  // usec -= 1000000
+(p7)   adds r2=1, r2                   // ++sec
+(p7)   br.spnt.many 1b
+
+       // finally: r2 = sec, r3 = usec
+EX(.fail, st8 [r32]=r2)
+       adds r9=8, r32
        mov r8=r0                       // success
-       
+       ;;
+EX(.fail, st8 [r9]=r3)                 // store them in the timeval struct
+       mov r10=0
        MCKINLEY_E9_WORKAROUND
-       
-       br.ret.sptk.many b6
-       // return to caller
+       br.ret.sptk.many b6             // return to caller
+       /*
+        * Note: We are NOT clearing the scratch registers here.  Since the only things
+        *       in those registers are time-related variables and some addresses (which
+        *       can be obtained from System.map), none of this should be security-sensitive
+        *       and we should be fine.
+        */
 
+.fail: adds r8=EINVAL, r0              // r8 = EINVAL
+       adds r10=-1, r0                 // r10 = -1
+       MCKINLEY_E9_WORKAROUND
+       br.ret.spnt.many b6             // return with r8 set to EINVAL
 END(fsys_gettimeofday)
 
        .rodata
index 671e0c9ebb6669b421c4c7e0bd32f634e8cdabdd..5547bb73c436231c593e31d203db48d842c80889 100644 (file)
@@ -170,7 +170,7 @@ tab[] =
     /* for assembly files which can't include sched.h: */
     { "IA64_CLONE_VFORK",              CLONE_VFORK },
     { "IA64_CLONE_VM",                 CLONE_VM },
-       /* used by fsys_gettimeofday in arch/ia64/kernel/fsys.S */
+    /* used by fsys_gettimeofday in arch/ia64/kernel/fsys.S */
     { "IA64_CPUINFO_ITM_DELTA_OFFSET",                 offsetof (struct cpuinfo_ia64, itm_delta) },
     { "IA64_CPUINFO_ITM_NEXT_OFFSET",          offsetof (struct cpuinfo_ia64, itm_next) },
     { "IA64_CPUINFO_NSEC_PER_CYC_OFFSET",      offsetof (struct cpuinfo_ia64, nsec_per_cyc) },