.globl U3copy_from_user
U3copy_from_user: /* %o0=dst, %o1=src, %o2=len */
cmp %o2, 0
- be,pn %XCC, out
+ be,pn %XCC, 85f
or %o0, %o1, %o3
cmp %o2, 16
- bleu,a,pn %XCC, small_copy
+ bleu,a,pn %XCC, 80f
or %o3, %o2, %o3
cmp %o2, 256
- blu,pt %XCC, medium_copy
+ blu,pt %XCC, 70f
andcc %o3, 0x7, %g0
- ba,pt %xcc, enter
+ ba,pt %xcc, 1f
andcc %o0, 0x3f, %g2
/* Here len >= 256 and condition codes reflect execution
* of "andcc %o0, 0x7, %g2", done by caller.
*/
.align 64
-enter:
+1:
/* Is 'dst' already aligned on an 64-byte boundary? */
be,pt %XCC, 2f
2: VISEntryHalf
and %o1, 0x7, %g1
- ba,pt %xcc, begin
+ ba,pt %xcc, 1f
alignaddr %o1, %g0, %o1
.align 64
-begin:
+1:
membar #StoreLoad | #StoreStore | #LoadStore
prefetcha [%o1 + 0x000] %asi, #one_read
prefetcha [%o1 + 0x040] %asi, #one_read
sub %o4, 0x80, %o4
add %o1, 0x40, %o1
- ba,pt %xcc, loop
+ ba,pt %xcc, 1f
srl %o4, 6, %o3
.align 64
-loop:
+1:
EX3(ldda [%o1 + 0x008] %asi, %f2)
faligndata %f12, %f14, %f28
EX3(ldda [%o1 + 0x010] %asi, %f4)
faligndata %f10, %f12, %f26
subcc %o3, 0x01, %o3
add %o1, 0x40, %o1
- bg,pt %XCC, loop
+ bg,pt %XCC, 1b
add %o0, 0x40, %o0
/* Finally we copy the last full 64-byte block. */
-loopfini:
EX3(ldda [%o1 + 0x008] %asi, %f2)
faligndata %f12, %f14, %f28
EX3(ldda [%o1 + 0x010] %asi, %f4)
* Also notice how this code is careful not to perform a
* load past the end of the src buffer.
*/
-loopend:
and %o2, 0x3f, %o2
andcc %o2, 0x38, %g2
- be,pn %XCC, endcruft
+ be,pn %XCC, 10f
subcc %g2, 0x8, %g2
- be,pn %XCC, endcruft
+ be,pn %XCC, 10f
cmp %g1, 0
be,a,pt %XCC, 1f
subcc %g2, 0x8, %g2
faligndata %f0, %f2, %f8
std %f8, [%o0 + 0x00]
- be,pn %XCC, endcruft
+ be,pn %XCC, 10f
add %o0, 0x8, %o0
EX(ldda [%o1 + 0x08] %asi, %f0, add %o2, %g0)
add %o1, 0x8, %o1
* Note that %g1 is (src & 0x3) saved above before the
* alignaddr was performed.
*/
-endcruft:
+10:
cmp %o2, 0
add %o1, %g1, %o1
VISExitHalf
- be,pn %XCC, out
+ be,pn %XCC, 85f
sub %o0, %o1, %o3
andcc %g1, 0x7, %g0
- bne,pn %icc, small_copy_unaligned
+ bne,pn %icc, 90f
andcc %o2, 0x8, %g0
be,pt %icc, 1f
nop
add %o1, 0x2, %o1
1: andcc %o2, 0x1, %g0
- be,pt %icc, out
+ be,pt %icc, 85f
nop
EXNV(lduba [%o1] %asi, %o5, and %o2, 0x1)
- ba,pt %xcc, out
+ ba,pt %xcc, 85f
stb %o5, [%o1 + %o3]
-medium_copy: /* 16 < len <= 64 */
- bne,pn %XCC, small_copy_unaligned
+70: /* 16 < len <= 64 */
+ bne,pn %XCC, 90f
sub %o0, %o1, %o3
-medium_copy_aligned:
andn %o2, 0x7, %o4
and %o2, 0x7, %o2
1: subcc %o4, 0x8, %o4
stw %o5, [%o1 + %o3]
add %o1, 0x4, %o1
1: cmp %o2, 0
- be,pt %XCC, out
+ be,pt %XCC, 85f
nop
- ba,pt %xcc, small_copy_unaligned
+ ba,pt %xcc, 90f
nop
-small_copy: /* 0 < len <= 16 */
+80: /* 0 < len <= 16 */
andcc %o3, 0x3, %g0
- bne,pn %XCC, small_copy_unaligned
+ bne,pn %XCC, 90f
sub %o0, %o1, %o3
-small_copy_aligned:
+1:
subcc %o2, 4, %o2
EXNV(lduwa [%o1] %asi, %g1, add %o2, %g0)
stw %g1, [%o1 + %o3]
- bgu,pt %XCC, small_copy_aligned
+ bgu,pt %XCC, 1b
add %o1, 4, %o1
-out: retl
+85: retl
clr %o0
.align 32
-small_copy_unaligned:
+90:
subcc %o2, 1, %o2
EXNV(lduba [%o1] %asi, %g1, add %o2, %g0)
stb %g1, [%o1 + %o3]
- bgu,pt %XCC, small_copy_unaligned
+ bgu,pt %XCC, 90b
add %o1, 1, %o1
retl
clr %o0
nop
cmp %o2, 0
- be,pn %XCC, out
+ be,pn %XCC, 85f
or %o0, %o1, %o3
cmp %o2, 16
- bleu,a,pn %XCC, small_copy
+ bleu,a,pn %XCC, 80f
or %o3, %o2, %o3
cmp %o2, 256
- blu,pt %XCC, medium_copy
+ blu,pt %XCC, 70f
andcc %o3, 0x7, %g0
- ba,pt %xcc, enter
+ ba,pt %xcc, 1f
andcc %o0, 0x3f, %g2
/* Here len >= 256 and condition codes reflect execution
* of "andcc %o0, 0x7, %g2", done by caller.
*/
.align 64
-enter:
+1:
/* Is 'dst' already aligned on an 64-byte boundary? */
be,pt %XCC, 2f
2: VISEntryHalf
and %o1, 0x7, %g1
- ba,pt %xcc, begin
+ ba,pt %xcc, 1f
alignaddr %o1, %g0, %o1
.align 64
-begin:
+1:
membar #StoreLoad | #StoreStore | #LoadStore
prefetch [%o1 + 0x000], #one_read
prefetch [%o1 + 0x040], #one_read
sub %o4, 0x80, %o4
add %o1, 0x40, %o1
- ba,pt %xcc, loop
+ ba,pt %xcc, 1f
srl %o4, 6, %o3
.align 64
-loop:
+1:
ldd [%o1 + 0x008], %f2
faligndata %f12, %f14, %f28
ldd [%o1 + 0x010], %f4
faligndata %f10, %f12, %f26
subcc %o3, 0x01, %o3
add %o1, 0x40, %o1
- bg,pt %XCC, loop
+ bg,pt %XCC, 1b
add %o0, 0x40, %o0
/* Finally we copy the last full 64-byte block. */
-loopfini:
ldd [%o1 + 0x008], %f2
faligndata %f12, %f14, %f28
ldd [%o1 + 0x010], %f4
* Also notice how this code is careful not to perform a
* load past the end of the src buffer.
*/
-loopend:
and %o2, 0x3f, %o2
andcc %o2, 0x38, %g2
- be,pn %XCC, endcruft
+ be,pn %XCC, 2f
subcc %g2, 0x8, %g2
- be,pn %XCC, endcruft
+ be,pn %XCC, 2f
cmp %g1, 0
be,a,pt %XCC, 1f
subcc %g2, 0x8, %g2
faligndata %f0, %f2, %f8
EX(stda %f8, [%o0 + 0x00] %asi, add %o2, 0x8)
- be,pn %XCC, endcruft
+ be,pn %XCC, 2f
add %o0, 0x8, %o0
ldd [%o1 + 0x08], %f0
add %o1, 0x8, %o1
* Note that %g1 is (src & 0x3) saved above before the
* alignaddr was performed.
*/
-endcruft:
+2:
cmp %o2, 0
add %o1, %g1, %o1
VISExitHalf
- be,pn %XCC, out
+ be,pn %XCC, 85f
sub %o0, %o1, %o3
andcc %g1, 0x7, %g0
- bne,pn %icc, small_copy_unaligned
+ bne,pn %icc, 90f
andcc %o2, 0x8, %g0
be,pt %icc, 1f
nop
add %o1, 0x2, %o1
1: andcc %o2, 0x1, %g0
- be,pt %icc, out
+ be,pt %icc, 85f
nop
ldub [%o1], %o5
- ba,pt %xcc, out
+ ba,pt %xcc, 85f
EXNV(stba %o5, [%o1 + %o3] ASI_AIUS, and %o2, 0x1)
-medium_copy: /* 16 < len <= 64 */
- bne,pn %XCC, small_copy_unaligned
+70: /* 16 < len <= 64 */
+ bne,pn %XCC, 90f
sub %o0, %o1, %o3
-medium_copy_aligned:
andn %o2, 0x7, %o4
and %o2, 0x7, %o2
1: subcc %o4, 0x8, %o4
EXNV3(stwa %o5, [%o1 + %o3] ASI_AIUS, add %o2, %g0)
add %o1, 0x4, %o1
1: cmp %o2, 0
- be,pt %XCC, out
+ be,pt %XCC, 85f
nop
- ba,pt %xcc, small_copy_unaligned
+ ba,pt %xcc, 90f
nop
-small_copy: /* 0 < len <= 16 */
+80: /* 0 < len <= 16 */
andcc %o3, 0x3, %g0
- bne,pn %XCC, small_copy_unaligned
+ bne,pn %XCC, 90f
sub %o0, %o1, %o3
-small_copy_aligned:
+1:
subcc %o2, 4, %o2
lduw [%o1], %g1
EXNV3(stwa %g1, [%o1 + %o3] ASI_AIUS, add %o2, %g0)
- bgu,pt %XCC, small_copy_aligned
+ bgu,pt %XCC, 1b
add %o1, 4, %o1
-out: retl
+85: retl
clr %o0
.align 32
-small_copy_unaligned:
+90:
subcc %o2, 1, %o2
ldub [%o1], %g1
EXNV2(stba %g1, [%o1 + %o3] ASI_AIUS, add %o2, %g0)
- bgu,pt %XCC, small_copy_unaligned
+ bgu,pt %XCC, 90b
add %o1, 1, %o1
retl
clr %o0
U3memcpy: /* %o0=dst, %o1=src, %o2=len */
mov %o0, %g5
cmp %o2, 0
- be,pn %XCC, out
+ be,pn %XCC, 85f
or %o0, %o1, %o3
cmp %o2, 16
- bleu,a,pn %XCC, small_copy
+ bleu,a,pn %XCC, 70f
or %o3, %o2, %o3
cmp %o2, 256
- blu,pt %XCC, medium_copy
+ blu,pt %XCC, 80f
andcc %o3, 0x7, %g0
- ba,pt %xcc, enter
+ ba,pt %xcc, 1f
andcc %o0, 0x3f, %g2
/* Here len >= 256 and condition codes reflect execution
* of "andcc %o0, 0x7, %g2", done by caller.
*/
.align 64
-enter:
+1:
/* Is 'dst' already aligned on an 64-byte boundary? */
be,pt %XCC, 2f
2: VISEntryHalf
and %o1, 0x7, %g1
- ba,pt %xcc, begin
+ ba,pt %xcc, 1f
alignaddr %o1, %g0, %o1
.align 64
-begin:
+1:
membar #StoreLoad | #StoreStore | #LoadStore
prefetch [%o1 + 0x000], #one_read
prefetch [%o1 + 0x040], #one_read
sub %o4, 0x80, %o4
add %o1, 0x40, %o1
- ba,pt %xcc, loop
+ ba,pt %xcc, 1f
srl %o4, 6, %o3
.align 64
-loop:
+1:
ldd [%o1 + 0x008], %f2
faligndata %f12, %f14, %f28
ldd [%o1 + 0x010], %f4
faligndata %f10, %f12, %f26
subcc %o3, 0x01, %o3
add %o1, 0x40, %o1
- bg,pt %XCC, loop
+ bg,pt %XCC, 1b
add %o0, 0x40, %o0
/* Finally we copy the last full 64-byte block. */
-loopfini:
ldd [%o1 + 0x008], %f2
faligndata %f12, %f14, %f28
ldd [%o1 + 0x010], %f4
* Also notice how this code is careful not to perform a
* load past the end of the src buffer.
*/
-loopend:
and %o2, 0x3f, %o2
andcc %o2, 0x38, %g2
- be,pn %XCC, endcruft
+ be,pn %XCC, 2f
subcc %g2, 0x8, %g2
- be,pn %XCC, endcruft
+ be,pn %XCC, 2f
cmp %g1, 0
be,a,pt %XCC, 1f
subcc %g2, 0x8, %g2
faligndata %f0, %f2, %f8
std %f8, [%o0 + 0x00]
- be,pn %XCC, endcruft
+ be,pn %XCC, 2f
add %o0, 0x8, %o0
ldd [%o1 + 0x08], %f0
add %o1, 0x8, %o1
* Note that %g1 is (src & 0x3) saved above before the
* alignaddr was performed.
*/
-endcruft:
+2:
cmp %o2, 0
add %o1, %g1, %o1
VISExitHalf
- be,pn %XCC, out
+ be,pn %XCC, 85f
sub %o0, %o1, %o3
andcc %g1, 0x7, %g0
- bne,pn %icc, small_copy_unaligned
+ bne,pn %icc, 90f
andcc %o2, 0x8, %g0
be,pt %icc, 1f
nop
add %o1, 0x2, %o1
1: andcc %o2, 0x1, %g0
- be,pt %icc, out
+ be,pt %icc, 85f
nop
ldub [%o1], %o5
- ba,pt %xcc, out
+ ba,pt %xcc, 85f
stb %o5, [%o1 + %o3]
-medium_copy: /* 16 < len <= 64 */
- bne,pn %XCC, small_copy_unaligned
+70: /* 16 < len <= 64 */
+ bne,pn %XCC, 90f
sub %o0, %o1, %o3
-medium_copy_aligned:
andn %o2, 0x7, %o4
and %o2, 0x7, %o2
1: subcc %o4, 0x8, %o4
stw %o5, [%o1 + %o3]
add %o1, 0x4, %o1
1: cmp %o2, 0
- be,pt %XCC, out
+ be,pt %XCC, 85f
nop
- ba,pt %xcc, small_copy_unaligned
+ ba,pt %xcc, 90f
nop
-small_copy: /* 0 < len <= 16 */
+80: /* 0 < len <= 16 */
andcc %o3, 0x3, %g0
- bne,pn %XCC, small_copy_unaligned
+ bne,pn %XCC, 90f
sub %o0, %o1, %o3
-small_copy_aligned:
+1:
subcc %o2, 4, %o2
lduw [%o1], %g1
stw %g1, [%o1 + %o3]
- bgu,pt %XCC, small_copy_aligned
+ bgu,pt %XCC, 1b
add %o1, 4, %o1
-out: retl
+85: retl
mov %g5, %o0
.align 32
-small_copy_unaligned:
+90:
subcc %o2, 1, %o2
ldub [%o1], %g1
stb %g1, [%o1 + %o3]
- bgu,pt %XCC, small_copy_unaligned
+ bgu,pt %XCC, 90b
add %o1, 1, %o1
retl
mov %g5, %o0