dmp/vortex86ex: Initialize Reatek ALC262 audio codec
[coreboot.git] / src / arch / x86 / lib / memmove.c
1 #include <string.h>
2
3 void *memmove(void *dest, const void *src, size_t n)
4 {
5         int d0,d1,d2,d3,d4,d5;
6         char *ret = dest;
7
8         __asm__ __volatile__(
9                 /* Handle more 16bytes in loop */
10                 "cmp $0x10, %0\n\t"
11                 "jb     1f\n\t"
12
13                 /* Decide forward/backward copy mode */
14                 "cmp %2, %1\n\t"
15                 "jb     2f\n\t"
16
17                 /*
18                  * movs instruction have many startup latency
19                  * so we handle small size by general register.
20                  */
21                 "cmp  $680, %0\n\t"
22                 "jb 3f\n\t"
23                 /*
24                  * movs instruction is only good for aligned case.
25                  */
26                 "mov %1, %3\n\t"
27                 "xor %2, %3\n\t"
28                 "and $0xff, %3\n\t"
29                 "jz 4f\n\t"
30                 "3:\n\t"
31                 "sub $0x10, %0\n\t"
32
33                 /*
34                  * We gobble 16byts forward in each loop.
35                  */
36                 "3:\n\t"
37                 "sub $0x10, %0\n\t"
38                 "mov 0*4(%1), %3\n\t"
39                 "mov 1*4(%1), %4\n\t"
40                 "mov  %3, 0*4(%2)\n\t"
41                 "mov  %4, 1*4(%2)\n\t"
42                 "mov 2*4(%1), %3\n\t"
43                 "mov 3*4(%1), %4\n\t"
44                 "mov  %3, 2*4(%2)\n\t"
45                 "mov  %4, 3*4(%2)\n\t"
46                 "lea  0x10(%1), %1\n\t"
47                 "lea  0x10(%2), %2\n\t"
48                 "jae 3b\n\t"
49                 "add $0x10, %0\n\t"
50                 "jmp 1f\n\t"
51
52                 /*
53                  * Handle data forward by movs.
54                  */
55                 ".p2align 4\n\t"
56                 "4:\n\t"
57                 "mov -4(%1, %0), %3\n\t"
58                 "lea -4(%2, %0), %4\n\t"
59                 "shr $2, %0\n\t"
60                 "rep movsl\n\t"
61                 "mov %3, (%4)\n\t"
62                 "jmp 11f\n\t"
63                 /*
64                  * Handle data backward by movs.
65                  */
66                 ".p2align 4\n\t"
67                 "6:\n\t"
68                 "mov (%1), %3\n\t"
69                 "mov %2, %4\n\t"
70                 "lea -4(%1, %0), %1\n\t"
71                 "lea -4(%2, %0), %2\n\t"
72                 "shr $2, %0\n\t"
73                 "std\n\t"
74                 "rep movsl\n\t"
75                 "mov %3,(%4)\n\t"
76                 "cld\n\t"
77                 "jmp 11f\n\t"
78
79                 /*
80                  * Start to prepare for backward copy.
81                  */
82                 ".p2align 4\n\t"
83                 "2:\n\t"
84                 "cmp  $680, %0\n\t"
85                 "jb 5f\n\t"
86                 "mov %1, %3\n\t"
87                 "xor %2, %3\n\t"
88                 "and $0xff, %3\n\t"
89                 "jz 6b\n\t"
90
91                 /*
92                  * Calculate copy position to tail.
93                  */
94                 "5:\n\t"
95                 "add %0, %1\n\t"
96                 "add %0, %2\n\t"
97                 "sub $0x10, %0\n\t"
98
99                 /*
100                  * We gobble 16byts backward in each loop.
101                  */
102                 "7:\n\t"
103                 "sub $0x10, %0\n\t"
104
105                 "mov -1*4(%1), %3\n\t"
106                 "mov -2*4(%1), %4\n\t"
107                 "mov  %3, -1*4(%2)\n\t"
108                 "mov  %4, -2*4(%2)\n\t"
109                 "mov -3*4(%1), %3\n\t"
110                 "mov -4*4(%1), %4\n\t"
111                 "mov  %3, -3*4(%2)\n\t"
112                 "mov  %4, -4*4(%2)\n\t"
113                 "lea  -0x10(%1), %1\n\t"
114                 "lea  -0x10(%2), %2\n\t"
115                 "jae 7b\n\t"
116                 /*
117                  * Calculate copy position to head.
118                  */
119                 "add $0x10, %0\n\t"
120                 "sub %0, %1\n\t"
121                 "sub %0, %2\n\t"
122
123                 /*
124                  * Move data from 8 bytes to 15 bytes.
125                  */
126                 ".p2align 4\n\t"
127                 "1:\n\t"
128                 "cmp $8, %0\n\t"
129                 "jb 8f\n\t"
130                 "mov 0*4(%1), %3\n\t"
131                 "mov 1*4(%1), %4\n\t"
132                 "mov -2*4(%1, %0), %5\n\t"
133                 "mov -1*4(%1, %0), %1\n\t"
134
135                 "mov  %3, 0*4(%2)\n\t"
136                 "mov  %4, 1*4(%2)\n\t"
137                 "mov  %5, -2*4(%2, %0)\n\t"
138                 "mov  %1, -1*4(%2, %0)\n\t"
139                 "jmp 11f\n\t"
140
141                 /*
142                  * Move data from 4 bytes to 7 bytes.
143                  */
144                 ".p2align 4\n\t"
145                 "8:\n\t"
146                 "cmp $4, %0\n\t"
147                 "jb 9f\n\t"
148                 "mov 0*4(%1), %3\n\t"
149                 "mov -1*4(%1, %0), %4\n\t"
150                 "mov  %3, 0*4(%2)\n\t"
151                 "mov  %4, -1*4(%2, %0)\n\t"
152                 "jmp 11f\n\t"
153
154                 /*
155                  * Move data from 2 bytes to 3 bytes.
156                  */
157                 ".p2align 4\n\t"
158                 "9:\n\t"
159                 "cmp $2, %0\n\t"
160                 "jb 10f\n\t"
161                 "movw 0*2(%1), %%dx\n\t"
162                 "movw -1*2(%1, %0), %%bx\n\t"
163                 "movw %%dx, 0*2(%2)\n\t"
164                 "movw %%bx, -1*2(%2, %0)\n\t"
165                 "jmp 11f\n\t"
166
167                 /*
168                  * Move data for 1 byte.
169                  */
170                 ".p2align 4\n\t"
171                 "10:\n\t"
172                 "cmp $1, %0\n\t"
173                 "jb 11f\n\t"
174                 "movb (%1), %%cl\n\t"
175                 "movb %%cl, (%2)\n\t"
176                 ".p2align 4\n\t"
177                 "11:"
178                 : "=&c" (d0), "=&S" (d1), "=&D" (d2),
179                   "=r" (d3),"=r" (d4), "=r"(d5)
180                 :"0" (n),
181                  "1" (src),
182                  "2" (dest)
183                 :"memory");
184
185         return ret;
186
187 }