*
* Copyright (C) 2003 Hewlett-Packard Co
* David Mosberger-Tang <davidm@hpl.hp.com>
+ *
+ * 18-Feb-03 louisk Implement fsys_gettimeofday().
+ * 28-Feb-03 davidm Fixed several bugs in fsys_gettimeofday(). Tuned it some more,
+ * probably broke it along the way... ;-)
*/
#include <asm/asmmacro.h>
#include <asm/errno.h>
#include <asm/offsets.h>
+#include <asm/percpu.h>
#include <asm/thread_info.h>
/*
br.ret.sptk.many b6
END(fsys_set_tid_address)
+/*
+ * Note 1: This routine uses floating-point registers, but only with registers that
+ * operate on integers. Because of that, we don't need to set ar.fpsr to the
+ * kernel default value.
+ *
+ * Note 2: For now, we will assume that all CPUs run at the same clock-frequency.
+ * If that wasn't the case, we would have to disable preemption (e.g.,
+ * by disabling interrupts) between reading the ITC and reading
+ * local_cpu_data->nsec_per_cyc.
+ *
+ * Note 3: On platforms where the ITC-drift bit is set in the SAL feature vector,
+ * we ought to either skip the ITC-based interpolation or run an ntp-like
+ * daemon to keep the ITCs from drifting too far apart.
+ */
ENTRY(fsys_gettimeofday)
-
add r9=TI_FLAGS+IA64_TASK_SIZE,r16
+ movl r3=THIS_CPU(cpu_info)
+
+ mov.m r31=ar.itc // put time stamp into r31 (ITC) == now (35 cyc)
+ movl r19=xtime // xtime is a timespec struct
;;
+
+#ifdef CONFIG_SMP
+ movl r10 = __per_cpu_offset
+ ;;
+ ld8 r10 = [r10] // r10 <- __per_cpu_offset[0]
+ movl r21 = cpu_info__per_cpu
+ ;;
+ add r10 = r21, r10 // r10 <- &cpu_data(time_keeper_id)
+#else
+ mov r10 = r3
+#endif
ld4 r9=[r9]
- ;;
- and r9=TIF_ALLWORK_MASK,r9
+ movl r17=xtime_lock
;;
+
// r32, r33 should contain the 2 args of gettimeofday
-
- tnat.nz p6,p7=r32 // in case the args are NaT
- cmp.ne p8, p0=0, r9
+ adds r21=IA64_CPUINFO_ITM_NEXT_OFFSET, r10
+ mov r2 = -1
+ tnat.nz p6,p7=r32 // guard against NaT args
;;
-
-(p7) tnat.nz p6,p0=r33
-(p8) br.spnt.many fsys_fallback_syscall
+
+ adds r10=IA64_CPUINFO_ITM_DELTA_OFFSET, r10
+(p7) tnat.nz p6,p0=r33
+(p6) br.cond.spnt.few .fail
+
+ adds r8=IA64_CPUINFO_NSEC_PER_CYC_OFFSET, r3
+ movl r24=2361183241434822607 // for division hack (only for / 1000)
;;
-(p6) adds r8=EINVAL, r0 // r8 = EINVAL
-(p6) adds r10=-1, r0 // r10 = -1
-(p6) br.ret.spnt.many b6 // return with r8 set to EINVAL
- movl r17=xtime_lock
- movl r19=xtime // xtime is a timespec struct
- movl r20=cpu_info__per_cpu
+ ldf8 f7=[r10] // f7 now contains itm_delta
+ setf.sig f11 = r2
+ nop 0
+
+ adds r20=IA64_TIMESPEC_TV_NSEC_OFFSET, r19 // r20 = &xtime->tv_nsec
movl r26=jiffies
- movl r27=wall_jiffies
- movl r31=last_nsec_offset
- movl r24=2361183241434822607 // for division hack (only for / 1000)
- ;;
+
setf.sig f9=r24 // f9 is used for division hack
- adds r21=IA64_CPUINFO_ITM_NEXT_OFFSET, r20
- adds r22=IA64_CPUINFO_ITM_DELTA_OFFSET, r20
- adds r30=IA64_CPUINFO_NSEC_PER_CYC_OFFSET, r20
- adds r3=IA64_TIMESPEC_TV_NSEC_OFFSET, r19
- // r3 = &xtime->tv_nsec
-
-
-while_loop_1:
-
- // *** seq = read_seqbegin(&xtime_lock); ***
-
- ld4 r23=[r17] // since &xtime_lock == &xtime_lock->sequence
-#ifdef CONFIG_SMP
- mf
-#endif
- ;; // barrier()
- // now r23 = seq
-
- ld8 r14=[r31] // r14 = old = last_nsec_offset
-
+ movl r27=wall_jiffies
+
+ and r9=TIF_ALLWORK_MASK,r9
+ movl r25=last_nsec_offset
+ ;;
+
+ ldf8 f10=[r8] // f10 <- local_cpu_data->nsec_per_cyc value
+ cmp.ne p8, p0=0, r9
+(p8) br.spnt.many fsys_fallback_syscall
+ ;;
+.retry: // *** seq = read_seqbegin(&xtime_lock); ***
+ ld4.acq r23=[r17] // since &xtime_lock == &xtime_lock->sequence
+ ld8 r14=[r25] // r14 (old) = last_nsec_offset
+
ld8 r28=[r26] // r28 = jiffies
ld8 r29=[r27] // r29 = wall_jiffies
;;
-
- ld8 r24=[r21] // r24 now contains itm_next
- ld8 r25=[r22] // r25 now contains itm_delta
- sub r28=r28, r29 // r28 now contains "lost"
+ ldf8 f8=[r21] // f8 now contains itm_next
+ sub r28=r29, r28, 1 // r28 now contains "-(lost + 1)"
+ tbit.nz p9, p10=r23, 0 // p9 <- is_odd(r23), p10 <- is_even(r23)
;;
- adds r28=1, r28 // r28 now contains "lost + 1"
- ;;
- setf.sig f6=r28
- setf.sig f7=r25
-
+
ld8 r2=[r19] // r2 = sec = xtime.tv_sec
- ;;
-
- ld8 r28=[r3] // r28 = nsec = xtime.tv_nsec
- xma.l f8=f6, f7, f0 // put lower 64-bits result of f6 * f7 in f8
- ;;
- getf.sig r18=f8 // r18 now contains the (lost + 1) * itm_delta
- ;;
- sub r18=r24, r18 // r18 is last_tick
- mov r25=ar.itc // put time stamp into r25 (ITC) == now
- ;;
- cmp.leu p7, p8 = r18, r25 // if last_tick <= now, p7 = 1
- ;;
-(p7) ld8 r24=[r30] // r24 contains local_cpu_data->nsec_per_cyc value
-(p7) sub r25=r25, r18 // elasped_cycles in r25
- ;;
-(p7) setf.sig f6=r24
-(p7) setf.sig f7=r25
- ;;
-(p7) xma.l f8=f6, f7, f0
+ ld8 r29=[r20] // r29 = nsec = xtime.tv_nsec
+ setf.sig f6=r28 // f6 <- -(lost + 1) (6 cyc)
;;
-(p7) getf.sig r18=f8 // r18 = clasped_cycles * local_cpu_data->nsec_per_cyc
- ;;
-(p7) shr.u r18=r18, IA64_NSEC_PER_CYC_SHIFT
-
-(p8) ld8 r18=[r31] // r18 = last_time_offset (is unsigned long)
- // now end of gettimeoffset, r18 should contain the desire result (offset)
+ mf
+ xma.l f8=f6, f7, f8 // f8 (last_tick) <- -(lost + 1)*itm_delta + itm_next (5 cyc)
+ nop 0
+ setf.sig f12=r31 // f12 <- ITC (6 cyc)
// *** if (unlikely(read_seqretry(&xtime_lock, seq))) continue; ***
-
- ;; // barrier()
-
-#ifdef CONFIG_SMP
- mf
-#endif
- adds r24=1, r0 // r24 = 1
- ld4 r25=[r17] // r25 = xtime_lock->sequence (load again)
- ;;
- and r24=r24, r23 // r24 = seq & 1
- xor r25=r25, r23 // r25 = xtime_lock->sequence ^ seq
+ ld4 r24=[r17] // r24 = xtime_lock->sequence (re-read)
+ nop 0
;;
- or r24=r24, r25 // now r24 = read_seqretry(&xtime_lock, seq)
- ;;
- cmp.ne p7, p0=r24, r0
- ;;
-(p7) br.spnt.many while_loop_1 // continue
-
- cmp.leu p7, p8 = r18, r14 // if (offset <= old)
- ;;
-(p7) mov r18=r14 // offset = old
-(p7) br.spnt.few loop_exit_1 // break
-
- mov ar.ccv=r18 // ar.ccv = offset
- ;;
- cmpxchg8.acq r25=[r31], r14, ar.ccv
- // compare-and-exchange (atomic!)
- ;;
- cmp.eq p8,p0 = r25, r14
- ;;
-(p8) br.sptk.many loop_exit_1
- br.sptk.many while_loop_1
-loop_exit_1:
+ mov r31 = ar.itc // re-read ITC in case we .retry (35 cyc)
+ xma.l f8=f11, f8, f12 // f8 (elapsed_cycles) <- (-1*last_tick + now) = (now - last_tick)
+ nop 0
+ ;;
- // at this point, r28 is nsec and r18 is offset
+ getf.sig r18=f8 // r18 <- (now - last_tick)
+ xmpy.l f8=f8, f10 // f8 <- elapsed_cycles*nsec_per_cyc (5 cyc)
+ add r3=r29, r14 // r3 = (nsec + old)
+ ;;
- add r3=r28, r18 // r3 = (nsec + offset)
+ cmp.lt p7, p8 = r18, r0 // if now < last_tick, set p7 = 1, p8 = 0
+ getf.sig r18=f8 // r18 = elapsed_cycles*nsec_per_cyc (6 cyc)
+ nop 0
;;
- // now we try to divide r3 by 1000 to get the value in usec instead of nsec
-
- shr.u r24 = r3, 3
+
+(p10) cmp.ne p9, p0=r23, r24 // if xtime_lock->sequence != seq, set p9
+ shr.u r18=r18, IA64_NSEC_PER_CYC_SHIFT // r18 <- offset
+(p9) br.spnt.many .retry
;;
- setf.sig f7 = r24
+
+ mov ar.ccv=r14 // ar.ccv = old (1 cyc)
+ cmp.leu p7, p8=r18, r14 // if (offset <= old), set p7 = 1, p8 = 0
;;
- xmpy.hu f6 = f7, f9
+
+(p8) cmpxchg8.rel r24=[r25], r18, ar.ccv // compare-and-exchange (atomic!)
+(p8) add r3=r29, r18 // r3 = (nsec + offset)
;;
- getf.sig r3 = f6
+ shr.u r3 = r3, 3 // initiate dividing r3 by 1000
;;
- shr.u r3 = r3, 4
- // end of division, r3 is divided by 1000 (=usec)
-
- addl r24=1000000, r0 // r24 = 1000000
+ setf.sig f8 = r3 // (6 cyc)
+ mov r10=1000000 // r10 = 1000000
;;
-
-while_loop_2:
-
- cmp.geu p7, p8=r3, r24 // while (usec >= 1000000)
+(p8) cmp.ne.unc p9, p0 = r24, r14
+ xmpy.hu f6 = f8, f9 // (5 cyc)
+(p9) br.spnt.many .retry
;;
-(p8) br.sptk.many loop_exit_2
- sub r3=r3, r24 // usec -= 1000000
- adds r2=1, r2 // ++sec
-
- br.many while_loop_2
-
-loop_exit_2:
-
- // finally, r2 = sec
- // r3 = usec
-
- mov r24=r32 // we need to preserve this...
+ getf.sig r3 = f6 // (6 cyc)
;;
- st8 [r32]=r2, 8
+ shr.u r3 = r3, 4 // end of division, r3 is divided by 1000 (=usec)
;;
- st8 [r32]=r3 // store them in the timeval struct
+
+1: cmp.geu p7, p0=r3, r10 // while (usec >= 1000000)
;;
- mov r32=r24
-
+(p7) sub r3=r3, r10 // usec -= 1000000
+(p7) adds r2=1, r2 // ++sec
+(p7) br.spnt.many 1b
+
+ // finally: r2 = sec, r3 = usec
+EX(.fail, st8 [r32]=r2)
+ adds r9=8, r32
mov r8=r0 // success
-
+ ;;
+EX(.fail, st8 [r9]=r3) // store them in the timeval struct
+ mov r10=0
MCKINLEY_E9_WORKAROUND
-
- br.ret.sptk.many b6
- // return to caller
+ br.ret.sptk.many b6 // return to caller
+ /*
+ * Note: We are NOT clearing the scratch registers here. Since the only things
+ * in those registers are time-related variables and some addresses (which
+ * can be obtained from System.map), none of this should be security-sensitive
+ * and we should be fine.
+ */
+.fail: adds r8=EINVAL, r0 // r8 = EINVAL
+ adds r10=-1, r0 // r10 = -1
+ MCKINLEY_E9_WORKAROUND
+ br.ret.spnt.many b6 // return with r8 set to EINVAL
END(fsys_gettimeofday)
.rodata