blob: 0c1102deb4c1f71e1e5c52df39b7b012047f2acf [file] [log] [blame]
Stefan Reinauer52db0b92012-12-07 17:15:04 -08001/*
2 * linux/arch/arm/lib/memset.S
3 *
4 * Copyright (C) 1995-2000 Russell King
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 *
10 * ASM optimised string functions
11 */
Julius Wernerd65e2142013-12-13 12:59:57 -080012
13#include <arch/asm.h>
14#include "asmlib.h"
Stefan Reinauer52db0b92012-12-07 17:15:04 -080015
Julius Wernerd65e2142013-12-13 12:59:57 -080016ENTRY(memset)
Stefan Reinauer52db0b92012-12-07 17:15:04 -080017 ands r3, r0, #3 @ 1 unaligned?
Julius Wernerd65e2142013-12-13 12:59:57 -080018 mov ip, r0 @ preserve r0 as return value
19 bne 6f @ 1
Stefan Reinauer52db0b92012-12-07 17:15:04 -080020/*
Julius Wernerd65e2142013-12-13 12:59:57 -080021 * we know that the pointer in ip is aligned to a word boundary.
Stefan Reinauer52db0b92012-12-07 17:15:04 -080022 */
Julius Wernerd65e2142013-12-13 12:59:57 -0800231: orr r1, r1, r1, lsl #8
Stefan Reinauer52db0b92012-12-07 17:15:04 -080024 orr r1, r1, r1, lsl #16
25 mov r3, r1
26 cmp r2, #16
27 blt 4f
28
29#if ! CALGN(1)+0
30
31/*
Julius Wernerd65e2142013-12-13 12:59:57 -080032 * We need 2 extra registers for this loop - use r8 and the LR
Stefan Reinauer52db0b92012-12-07 17:15:04 -080033 */
Julius Wernerd65e2142013-12-13 12:59:57 -080034 stmfd sp!, {r8, lr}
35 mov r8, r1
Stefan Reinauer52db0b92012-12-07 17:15:04 -080036 mov lr, r1
37
382: subs r2, r2, #64
Julius Wernerd65e2142013-12-13 12:59:57 -080039 stmgeia ip!, {r1, r3, r8, lr} @ 64 bytes at a time.
40 stmgeia ip!, {r1, r3, r8, lr}
41 stmgeia ip!, {r1, r3, r8, lr}
42 stmgeia ip!, {r1, r3, r8, lr}
Stefan Reinauer52db0b92012-12-07 17:15:04 -080043 bgt 2b
Julius Wernerd65e2142013-12-13 12:59:57 -080044 ldmeqfd sp!, {r8, pc} @ Now <64 bytes to go.
Stefan Reinauer52db0b92012-12-07 17:15:04 -080045/*
46 * No need to correct the count; we're only testing bits from now on
47 */
48 tst r2, #32
Julius Wernerd65e2142013-12-13 12:59:57 -080049 stmneia ip!, {r1, r3, r8, lr}
50 stmneia ip!, {r1, r3, r8, lr}
Stefan Reinauer52db0b92012-12-07 17:15:04 -080051 tst r2, #16
Julius Wernerd65e2142013-12-13 12:59:57 -080052 stmneia ip!, {r1, r3, r8, lr}
53 ldmfd sp!, {r8, lr}
Stefan Reinauer52db0b92012-12-07 17:15:04 -080054
55#else
56
57/*
58 * This version aligns the destination pointer in order to write
59 * whole cache lines at once.
60 */
61
Julius Wernerd65e2142013-12-13 12:59:57 -080062 stmfd sp!, {r4-r8, lr}
Stefan Reinauer52db0b92012-12-07 17:15:04 -080063 mov r4, r1
64 mov r5, r1
65 mov r6, r1
66 mov r7, r1
Julius Wernerd65e2142013-12-13 12:59:57 -080067 mov r8, r1
Stefan Reinauer52db0b92012-12-07 17:15:04 -080068 mov lr, r1
69
70 cmp r2, #96
Julius Wernerd65e2142013-12-13 12:59:57 -080071 tstgt ip, #31
Stefan Reinauer52db0b92012-12-07 17:15:04 -080072 ble 3f
73
Julius Wernerd65e2142013-12-13 12:59:57 -080074 and r8, ip, #31
75 rsb r8, r8, #32
76 sub r2, r2, r8
77 movs r8, r8, lsl #(32 - 4)
78 stmcsia ip!, {r4, r5, r6, r7}
79 stmmiia ip!, {r4, r5}
80 tst r8, #(1 << 30)
81 mov r8, r1
82 strne r1, [ip], #4
Stefan Reinauer52db0b92012-12-07 17:15:04 -080083
843: subs r2, r2, #64
Julius Wernerd65e2142013-12-13 12:59:57 -080085 stmgeia ip!, {r1, r3-r8, lr}
86 stmgeia ip!, {r1, r3-r8, lr}
Stefan Reinauer52db0b92012-12-07 17:15:04 -080087 bgt 3b
Julius Wernerd65e2142013-12-13 12:59:57 -080088 ldmeqfd sp!, {r4-r8, pc}
Stefan Reinauer52db0b92012-12-07 17:15:04 -080089
90 tst r2, #32
Julius Wernerd65e2142013-12-13 12:59:57 -080091 stmneia ip!, {r1, r3-r8, lr}
Stefan Reinauer52db0b92012-12-07 17:15:04 -080092 tst r2, #16
Julius Wernerd65e2142013-12-13 12:59:57 -080093 stmneia ip!, {r4-r7}
94 ldmfd sp!, {r4-r8, lr}
Stefan Reinauer52db0b92012-12-07 17:15:04 -080095
96#endif
97
984: tst r2, #8
Julius Wernerd65e2142013-12-13 12:59:57 -080099 stmneia ip!, {r1, r3}
Stefan Reinauer52db0b92012-12-07 17:15:04 -0800100 tst r2, #4
Julius Wernerd65e2142013-12-13 12:59:57 -0800101 strne r1, [ip], #4
Stefan Reinauer52db0b92012-12-07 17:15:04 -0800102/*
103 * When we get here, we've got less than 4 bytes to zero. We
104 * may have an unaligned pointer as well.
105 */
1065: tst r2, #2
Julius Wernerd65e2142013-12-13 12:59:57 -0800107 strneb r1, [ip], #1
108 strneb r1, [ip], #1
Stefan Reinauer52db0b92012-12-07 17:15:04 -0800109 tst r2, #1
Julius Wernerd65e2142013-12-13 12:59:57 -0800110 strneb r1, [ip], #1
Stefan Reinauer52db0b92012-12-07 17:15:04 -0800111 mov pc, lr
Julius Wernerd65e2142013-12-13 12:59:57 -0800112
1136: subs r2, r2, #4 @ 1 do we have enough
114 blt 5b @ 1 bytes to align with?
115 cmp r3, #2 @ 1
116 strltb r1, [ip], #1 @ 1
117 strleb r1, [ip], #1 @ 1
118 strb r1, [ip], #1 @ 1
119 add r2, r2, r3 @ 1 (r2 = r2 - (4 - r3))
120 b 1b
121ENDPROC(memset)