summaryrefslogtreecommitdiffstats
path: root/vendor/golang.org/x/crypto/poly1305/sum_s390x.s
blob: 806d1694b041a42724dbe23b3b931bfd6ef1c8c7 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

// +build go1.11,!gccgo,!purego

#include "textflag.h"

// Implementation of Poly1305 using the vector facility (vx).

// constants
#define MOD26 V0
#define EX0   V1
#define EX1   V2
#define EX2   V3

// temporaries
#define T_0 V4
#define T_1 V5
#define T_2 V6
#define T_3 V7
#define T_4 V8

// key (r)
#define R_0  V9
#define R_1  V10
#define R_2  V11
#define R_3  V12
#define R_4  V13
#define R5_1 V14
#define R5_2 V15
#define R5_3 V16
#define R5_4 V17
#define RSAVE_0 R5
#define RSAVE_1 R6
#define RSAVE_2 R7
#define RSAVE_3 R8
#define RSAVE_4 R9
#define R5SAVE_1 V28
#define R5SAVE_2 V29
#define R5SAVE_3 V30
#define R5SAVE_4 V31

// message block
#define F_0 V18
#define F_1 V19
#define F_2 V20
#define F_3 V21
#define F_4 V22

// accumulator
#define H_0 V23
#define H_1 V24
#define H_2 V25
#define H_3 V26
#define H_4 V27

GLOBL ·keyMask<>(SB), RODATA, $16
DATA ·keyMask<>+0(SB)/8, $0xffffff0ffcffff0f
DATA ·keyMask<>+8(SB)/8, $0xfcffff0ffcffff0f

GLOBL ·bswapMask<>(SB), RODATA, $16
DATA ·bswapMask<>+0(SB)/8, $0x0f0e0d0c0b0a0908
DATA ·bswapMask<>+8(SB)/8, $0x0706050403020100

GLOBL ·constants<>(SB), RODATA, $64
// MOD26
DATA ·constants<>+0(SB)/8, $0x3ffffff
DATA ·constants<>+8(SB)/8, $0x3ffffff
// EX0
DATA ·constants<>+16(SB)/8, $0x0006050403020100
DATA ·constants<>+24(SB)/8, $0x1016151413121110
// EX1
DATA ·constants<>+32(SB)/8, $0x060c0b0a09080706
DATA ·constants<>+40(SB)/8, $0x161c1b1a19181716
// EX2
DATA ·constants<>+48(SB)/8, $0x0d0d0d0d0d0f0e0d
DATA ·constants<>+56(SB)/8, $0x1d1d1d1d1d1f1e1d

// h = (f*g) % (2**130-5) [partial reduction]
#define MULTIPLY(f0, f1, f2, f3, f4, g0, g1, g2, g3, g4, g51, g52, g53, g54, h0, h1, h2, h3, h4) \
	VMLOF  f0, g0, h0        \
	VMLOF  f0, g1, h1        \
	VMLOF  f0, g2, h2        \
	VMLOF  f0, g3, h3        \
	VMLOF  f0, g4, h4        \
	VMLOF  f1, g54, T_0      \
	VMLOF  f1, g0, T_1       \
	VMLOF  f1, g1, T_2       \
	VMLOF  f1, g2, T_3       \
	VMLOF  f1, g3, T_4       \
	VMALOF f2, g53, h0, h0   \
	VMALOF f2, g54, h1, h1   \
	VMALOF f2, g0, h2, h2    \
	VMALOF f2, g1, h3, h3    \
	VMALOF f2, g2, h4, h4    \
	VMALOF f3, g52, T_0, T_0 \
	VMALOF f3, g53, T_1, T_1 \
	VMALOF f3, g54, T_2, T_2 \
	VMALOF f3, g0, T_3, T_3  \
	VMALOF f3, g1, T_4, T_4  \
	VMALOF f4, g51, h0, h0   \
	VMALOF f4, g52, h1, h1   \
	VMALOF f4, g53, h2, h2   \
	VMALOF f4, g54, h3, h3   \
	VMALOF f4, g0, h4, h4    \
	VAG    T_0, h0, h0       \
	VAG    T_1, h1, h1       \
	VAG    T_2, h2, h2       \
	VAG    T_3, h3, h3       \
	VAG    T_4, h4, h4

// carry h0->h1 h3->h4, h1->h2 h4->h0, h0->h1 h2->h3, h3->h4
#define REDUCE(h0, h1, h2, h3, h4) \
	VESRLG $26, h0, T_0  \
	VESRLG $26, h3, T_1  \
	VN     MOD26, h0, h0 \
	VN     MOD26, h3, h3 \
	VAG    T_0, h1, h1   \
	VAG    T_1, h4, h4   \
	VESRLG $26, h1, T_2  \
	VESRLG $26, h4, T_3  \
	VN     MOD26, h1, h1 \
	VN     MOD26, h4, h4 \
	VESLG  $2, T_3, T_4  \
	VAG    T_3, T_4, T_4 \
	VAG    T_2, h2, h2   \
	VAG    T_4, h0, h0   \
	VESRLG $26, h2, T_0  \
	VESRLG $26, h0, T_1  \
	VN     MOD26, h2, h2 \
	VN     MOD26, h0, h0 \
	VAG    T_0, h3, h3   \
	VAG    T_1, h1, h1   \
	VESRLG $26, h3, T_2  \
	VN     MOD26, h3, h3 \
	VAG    T_2, h4, h4

// expand in0 into d[0] and in1 into d[1]
#define EXPAND(in0, in1, d0, d1, d2, d3, d4) \
	VGBM   $0x0707, d1       \ // d1=tmp
	VPERM  in0, in1, EX2, d4 \
	VPERM  in0, in1, EX0, d0 \
	VPERM  in0, in1, EX1, d2 \
	VN     d1, d4, d4        \
	VESRLG $26, d0, d1       \
	VESRLG $30, d2, d3       \
	VESRLG $4, d2, d2        \
	VN     MOD26, d0, d0     \
	VN     MOD26, d1, d1     \
	VN     MOD26, d2, d2     \
	VN     MOD26, d3, d3

// pack h4:h0 into h1:h0 (no carry)
#define PACK(h0, h1, h2, h3, h4) \
	VESLG $26, h1, h1  \
	VESLG $26, h3, h3  \
	VO    h0, h1, h0   \
	VO    h2, h3, h2   \
	VESLG $4, h2, h2   \
	VLEIB $7, $48, h1  \
	VSLB  h1, h2, h2   \
	VO    h0, h2, h0   \
	VLEIB $7, $104, h1 \
	VSLB  h1, h4, h3   \
	VO    h3, h0, h0   \
	VLEIB $7, $24, h1  \
	VSRLB h1, h4, h1

// if h > 2**130-5 then h -= 2**130-5
#define MOD(h0, h1, t0, t1, t2) \
	VZERO t0          \
	VLEIG $1, $5, t0  \
	VACCQ h0, t0, t1  \
	VAQ   h0, t0, t0  \
	VONE  t2          \
	VLEIG $1, $-4, t2 \
	VAQ   t2, t1, t1  \
	VACCQ h1, t1, t1  \
	VONE  t2          \
	VAQ   t2, t1, t1  \
	VN    h0, t1, t2  \
	VNC   t0, t1,