1 /* mpn_mul_basecase for IBM z13 and later -- Internal routine to multiply two
2 natural numbers of length m and n.
3
4 THIS IS AN INTERNAL FUNCTION WITH A MUTABLE INTERFACE. IT IS ONLY
5 SAFE TO REACH THIS FUNCTION THROUGH DOCUMENTED INTERFACES.
6
7 Copyright 2021 Free Software Foundation, Inc.
8
9 This file is part of the GNU MP Library.
10
11 The GNU MP Library is free software; you can redistribute it and/or modify
12 it under the terms of either:
13
14 * the GNU Lesser General Public License as published by the Free
15 Software Foundation; either version 3 of the License, or (at your
16 option) any later version.
17
18 or
19
20 * the GNU General Public License as published by the Free Software
21 Foundation; either version 2 of the License, or (at your option) any
22 later version.
23
24 or both in parallel, as here.
25
26 The GNU MP Library is distributed in the hope that it will be useful, but
27 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
28 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
29 for more details.
30
31 You should have received copies of the GNU General Public License and the
32 GNU Lesser General Public License along with the GNU MP Library. If not,
33 see https://www.gnu.org/licenses/. */
34
35 #include <stdlib.h>
36
37 #include "gmp-impl.h"
38
39 /* Note: we explicitly inline all mul and addmul routines here to reduce the
40 * number of branches in prologues of unrolled functions. That comes at the
41 cost of duplicating common loop bodies in object code. */
42 #define DO_INLINE
43
44 /*
45 * tweak loop conditions in addmul subroutines to enable use of
46 * branch-relative-on-count (BRCTG) instructions, which currently results in
47 * better performance.
48 */
49 #define BRCTG
50
51 #include "s390_64/z13/common-vec.h"
52
53 #define OPERATION_mul_1
54 #include "s390_64/z13/addmul_1.c"
55 #undef OPERATION_mul_1
56
57 #define OPERATION_addmul_1
58 #include "s390_64/z13/addmul_1.c"
59 #undef OPERATION_addmul_1
60
61 #define OPERATION_mul_2
62 #include "s390_64/z13/aormul_2.c"
63 #undef OPERATION_mul_2
64
65 #define OPERATION_addmul_2
66 #include "s390_64/z13/aormul_2.c"
67 #undef OPERATION_addmul_2
68
69 void
70 mpn_mul_basecase (mp_ptr rp, mp_srcptr up, mp_size_t un, mp_srcptr vp,
71 mp_size_t vn)
72 {
73 ASSERT (un >= vn);
74 ASSERT (vn >= 1);
75 ASSERT (!MPN_OVERLAP_P (rp, un + vn, up, un));
76 ASSERT (!MPN_OVERLAP_P (rp, un + vn, vp, vn));
77
78 /* The implementations of (add)mul_1/2 are 4x-unrolled. Pull out the branch
79 * for un%4 and inline specific variants. */
80
81 #define BRANCH_FOR_MOD(N) \
82 do \
83 { \
84 if (vn >= 2) \
85 { \
86 rp[un + 1] = inline_mul_2 (rp, up, un, vp); \
87 rp += 2, vp += 2, vn -= 2; \
88 } \
89 else \
90 { \
91 rp[un] = inline_mul_1 (rp, up, un, vp[0]); \
92 return; \
93 } \
94 \
95 while (vn >= 2) \
96 { \
97 rp[un + 2 - 1] = inline_addmul_2 (rp, up, un, vp); \
98 rp += 2, vp += 2, vn -= 2; \
99 } \
100 \
101 while (vn >= 1) \
102 { \
103 rp[un] = inline_addmul_1 (rp, up, un, vp[0]); \
104 rp += 1, vp += 1, vn -= 1; \
105 } \
106 } \
107 while (0);
108
109 switch (((size_t)un) % 4)
110 {
111 case 0:
112 BRANCH_FOR_MOD (0);
113 break;
114 case 1:
115 BRANCH_FOR_MOD (1);
116 break;
117 case 2:
118 BRANCH_FOR_MOD (2);
119 break;
120 case 3:
121 BRANCH_FOR_MOD (3);
122 break;
123 }
124 }