blob: ba121278dae631694916ff78062e28618da7a7b3 [file] [log] [blame]
Gabe Blackccdc0052013-07-07 14:08:30 -07001#include <string.h>
2
3void *memmove(void *dest, const void *src, size_t n)
4{
5 int d0,d1,d2,d3,d4,d5;
6 char *ret = dest;
7
8 __asm__ __volatile__(
9 /* Handle more 16bytes in loop */
10 "cmp $0x10, %0\n\t"
11 "jb 1f\n\t"
12
13 /* Decide forward/backward copy mode */
14 "cmp %2, %1\n\t"
15 "jb 2f\n\t"
16
17 /*
18 * movs instruction have many startup latency
19 * so we handle small size by general register.
20 */
21 "cmp $680, %0\n\t"
22 "jb 3f\n\t"
23 /*
24 * movs instruction is only good for aligned case.
25 */
26 "mov %1, %3\n\t"
27 "xor %2, %3\n\t"
28 "and $0xff, %3\n\t"
29 "jz 4f\n\t"
30 "3:\n\t"
31 "sub $0x10, %0\n\t"
32
33 /*
34 * We gobble 16byts forward in each loop.
35 */
36 "3:\n\t"
37 "sub $0x10, %0\n\t"
38 "mov 0*4(%1), %3\n\t"
39 "mov 1*4(%1), %4\n\t"
40 "mov %3, 0*4(%2)\n\t"
41 "mov %4, 1*4(%2)\n\t"
42 "mov 2*4(%1), %3\n\t"
43 "mov 3*4(%1), %4\n\t"
44 "mov %3, 2*4(%2)\n\t"
45 "mov %4, 3*4(%2)\n\t"
46 "lea 0x10(%1), %1\n\t"
47 "lea 0x10(%2), %2\n\t"
48 "jae 3b\n\t"
49 "add $0x10, %0\n\t"
50 "jmp 1f\n\t"
51
52 /*
53 * Handle data forward by movs.
54 */
55 ".p2align 4\n\t"
56 "4:\n\t"
57 "mov -4(%1, %0), %3\n\t"
58 "lea -4(%2, %0), %4\n\t"
59 "shr $2, %0\n\t"
60 "rep movsl\n\t"
61 "mov %3, (%4)\n\t"
62 "jmp 11f\n\t"
63 /*
64 * Handle data backward by movs.
65 */
66 ".p2align 4\n\t"
67 "6:\n\t"
68 "mov (%1), %3\n\t"
69 "mov %2, %4\n\t"
70 "lea -4(%1, %0), %1\n\t"
71 "lea -4(%2, %0), %2\n\t"
72 "shr $2, %0\n\t"
73 "std\n\t"
74 "rep movsl\n\t"
75 "mov %3,(%4)\n\t"
76 "cld\n\t"
77 "jmp 11f\n\t"
78
79 /*
80 * Start to prepare for backward copy.
81 */
82 ".p2align 4\n\t"
83 "2:\n\t"
84 "cmp $680, %0\n\t"
85 "jb 5f\n\t"
86 "mov %1, %3\n\t"
87 "xor %2, %3\n\t"
88 "and $0xff, %3\n\t"
89 "jz 6b\n\t"
90
91 /*
92 * Calculate copy position to tail.
93 */
94 "5:\n\t"
95 "add %0, %1\n\t"
96 "add %0, %2\n\t"
97 "sub $0x10, %0\n\t"
98
99 /*
100 * We gobble 16byts backward in each loop.
101 */
102 "7:\n\t"
103 "sub $0x10, %0\n\t"
104
105 "mov -1*4(%1), %3\n\t"
106 "mov -2*4(%1), %4\n\t"
107 "mov %3, -1*4(%2)\n\t"
108 "mov %4, -2*4(%2)\n\t"
109 "mov -3*4(%1), %3\n\t"
110 "mov -4*4(%1), %4\n\t"
111 "mov %3, -3*4(%2)\n\t"
112 "mov %4, -4*4(%2)\n\t"
113 "lea -0x10(%1), %1\n\t"
114 "lea -0x10(%2), %2\n\t"
115 "jae 7b\n\t"
116 /*
117 * Calculate copy position to head.
118 */
119 "add $0x10, %0\n\t"
120 "sub %0, %1\n\t"
121 "sub %0, %2\n\t"
122
123 /*
124 * Move data from 8 bytes to 15 bytes.
125 */
126 ".p2align 4\n\t"
127 "1:\n\t"
128 "cmp $8, %0\n\t"
129 "jb 8f\n\t"
130 "mov 0*4(%1), %3\n\t"
131 "mov 1*4(%1), %4\n\t"
132 "mov -2*4(%1, %0), %5\n\t"
133 "mov -1*4(%1, %0), %1\n\t"
134
135 "mov %3, 0*4(%2)\n\t"
136 "mov %4, 1*4(%2)\n\t"
137 "mov %5, -2*4(%2, %0)\n\t"
138 "mov %1, -1*4(%2, %0)\n\t"
139 "jmp 11f\n\t"
140
141 /*
142 * Move data from 4 bytes to 7 bytes.
143 */
144 ".p2align 4\n\t"
145 "8:\n\t"
146 "cmp $4, %0\n\t"
147 "jb 9f\n\t"
148 "mov 0*4(%1), %3\n\t"
149 "mov -1*4(%1, %0), %4\n\t"
150 "mov %3, 0*4(%2)\n\t"
151 "mov %4, -1*4(%2, %0)\n\t"
152 "jmp 11f\n\t"
153
154 /*
155 * Move data from 2 bytes to 3 bytes.
156 */
157 ".p2align 4\n\t"
158 "9:\n\t"
159 "cmp $2, %0\n\t"
160 "jb 10f\n\t"
161 "movw 0*2(%1), %%dx\n\t"
162 "movw -1*2(%1, %0), %%bx\n\t"
163 "movw %%dx, 0*2(%2)\n\t"
164 "movw %%bx, -1*2(%2, %0)\n\t"
165 "jmp 11f\n\t"
166
167 /*
168 * Move data for 1 byte.
169 */
170 ".p2align 4\n\t"
171 "10:\n\t"
172 "cmp $1, %0\n\t"
173 "jb 11f\n\t"
174 "movb (%1), %%cl\n\t"
175 "movb %%cl, (%2)\n\t"
176 ".p2align 4\n\t"
177 "11:"
178 : "=&c" (d0), "=&S" (d1), "=&D" (d2),
179 "=r" (d3),"=r" (d4), "=r"(d5)
180 :"0" (n),
181 "1" (src),
182 "2" (dest)
183 :"memory");
184
185 return ret;
186
187}