1 """
2 Lib/ctypes.util.find_library() support for AIX
3 Similar approach as done for Darwin support by using separate files
4 but unlike Darwin - no extension such as ctypes.macholib.*
5
6 dlopen() is an interface to AIX initAndLoad() - primary documentation at:
7 https://www.ibm.com/support/knowledgecenter/en/ssw_aix_61/com.ibm.aix.basetrf1/dlopen.htm
8 https://www.ibm.com/support/knowledgecenter/en/ssw_aix_61/com.ibm.aix.basetrf1/load.htm
9
10 AIX supports two styles for dlopen(): svr4 (System V Release 4) which is common on posix
11 platforms, but also a BSD style - aka SVR3.
12
13 From AIX 5.3 Difference Addendum (December 2004)
14 2.9 SVR4 linking affinity
15 Nowadays, there are two major object file formats used by the operating systems:
16 XCOFF: The COFF enhanced by IBM and others. The original COFF (Common
17 Object File Format) was the base of SVR3 and BSD 4.2 systems.
18 ELF: Executable and Linking Format that was developed by AT&T and is a
19 base for SVR4 UNIX.
20
21 While the shared library content is identical on AIX - one is located as a filepath name
22 (svr4 style) and the other is located as a member of an archive (and the archive
23 is located as a filepath name).
24
25 The key difference arises when supporting multiple abi formats (i.e., 32 and 64 bit).
26 For svr4 either only one ABI is supported, or there are two directories, or there
27 are different file names. The most common solution for multiple ABI is multiple
28 directories.
29
30 For the XCOFF (aka AIX) style - one directory (one archive file) is sufficient
31 as multiple shared libraries can be in the archive - even sharing the same name.
32 In documentation the archive is also referred to as the "base" and the shared
33 library object is referred to as the "member".
34
35 For dlopen() on AIX (read initAndLoad()) the calls are similar.
36 Default activity occurs when no path information is provided. When path
37 information is provided dlopen() does not search any other directories.
38
39 For SVR4 - the shared library name is the name of the file expected: libFOO.so
40 For AIX - the shared library is expressed as base(member). The search is for the
41 base (e.g., libFOO.a) and once the base is found the shared library - identified by
42 member (e.g., libFOO.so, or shr.o) is located and loaded.
43
44 The mode bit RTLD_MEMBER tells initAndLoad() that it needs to use the AIX (SVR3)
45 naming style.
46 """
47 __author__ = "Michael Felt <aixtools@felt.demon.nl>"
48
49 import re
50 from os import environ, path
51 from sys import executable
52 from ctypes import c_void_p, sizeof
53 from subprocess import Popen, PIPE, DEVNULL
54
55 # Executable bit size - 32 or 64
56 # Used to filter the search in an archive by size, e.g., -X64
57 AIX_ABI = sizeof(c_void_p) * 8
58
59
60 from sys import maxsize
61 def _last_version(libnames, sep):
62 def _num_version(libname):
63 # "libxyz.so.MAJOR.MINOR" => [MAJOR, MINOR]
64 parts = libname.split(sep)
65 nums = []
66 try:
67 while parts:
68 nums.insert(0, int(parts.pop()))
69 except ValueError:
70 pass
71 return nums or [maxsize]
72 return max(reversed(libnames), key=_num_version)
73
74 def get_ld_header(p):
75 # "nested-function, but placed at module level
76 ld_header = None
77 for line in p.stdout:
78 if line.startswith(('/', './', '../')):
79 ld_header = line
80 elif "INDEX" in line:
81 return ld_header.rstrip('\n')
82 return None
83
84 def get_ld_header_info(p):
85 # "nested-function, but placed at module level
86 # as an ld_header was found, return known paths, archives and members
87 # these lines start with a digit
88 info = []
89 for line in p.stdout:
90 if re.match("[0-9]", line):
91 info.append(line)
92 else:
93 # blank line (separator), consume line and end for loop
94 break
95 return info
96
97 def get_ld_headers(file):
98 """
99 Parse the header of the loader section of executable and archives
100 This function calls /usr/bin/dump -H as a subprocess
101 and returns a list of (ld_header, ld_header_info) tuples.
102 """
103 # get_ld_headers parsing:
104 # 1. Find a line that starts with /, ./, or ../ - set as ld_header
105 # 2. If "INDEX" in occurs in a following line - return ld_header
106 # 3. get info (lines starting with [0-9])
107 ldr_headers = []
108 p = Popen(["/usr/bin/dump", f"-X{AIX_ABI}", "-H", file],
109 universal_newlines=True, stdout=PIPE, stderr=DEVNULL)
110 # be sure to read to the end-of-file - getting all entries
111 while ld_header := get_ld_header(p):
112 ldr_headers.append((ld_header, get_ld_header_info(p)))
113 p.stdout.close()
114 p.wait()
115 return ldr_headers
116
117 def get_shared(ld_headers):
118 """
119 extract the shareable objects from ld_headers
120 character "[" is used to strip off the path information.
121 Note: the "[" and "]" characters that are part of dump -H output
122 are not removed here.
123 """
124 shared = []
125 for (line, _) in ld_headers:
126 # potential member lines contain "["
127 # otherwise, no processing needed
128 if "[" in line:
129 # Strip off trailing colon (:)
130 shared.append(line[line.index("["):-1])
131 return shared
132
133 def get_one_match(expr, lines):
134 """
135 Must be only one match, otherwise result is None.
136 When there is a match, strip leading "[" and trailing "]"
137 """
138 # member names in the ld_headers output are between square brackets
139 expr = rf'\[({expr})\]'
140 matches = list(filter(None, (re.search(expr, line) for line in lines)))
141 if len(matches) == 1:
142 return matches[0].group(1)
143 else:
144 return None
145
146 # additional processing to deal with AIX legacy names for 64-bit members
147 def get_legacy(members):
148 """
149 This routine provides historical aka legacy naming schemes started
150 in AIX4 shared library support for library members names.
151 e.g., in /usr/lib/libc.a the member name shr.o for 32-bit binary and
152 shr_64.o for 64-bit binary.
153 """
154 if AIX_ABI == 64:
155 # AIX 64-bit member is one of shr64.o, shr_64.o, or shr4_64.o
156 expr = r'shr4?_?64\.o'
157 member = get_one_match(expr, members)
158 if member:
159 return member
160 else:
161 # 32-bit legacy names - both shr.o and shr4.o exist.
162 # shr.o is the preferred name so we look for shr.o first
163 # i.e., shr4.o is returned only when shr.o does not exist
164 for name in ['shr.o', 'shr4.o']:
165 member = get_one_match(re.escape(name), members)
166 if member:
167 return member
168 return None
169
170 def get_version(name, members):
171 """
172 Sort list of members and return highest numbered version - if it exists.
173 This function is called when an unversioned libFOO.a(libFOO.so) has
174 not been found.
175
176 Versioning for the member name is expected to follow
177 GNU LIBTOOL conventions: the highest version (x, then X.y, then X.Y.z)
178 * find [libFoo.so.X]
179 * find [libFoo.so.X.Y]
180 * find [libFoo.so.X.Y.Z]
181
182 Before the GNU convention became the standard scheme regardless of
183 binary size AIX packagers used GNU convention "as-is" for 32-bit
184 archive members but used an "distinguishing" name for 64-bit members.
185 This scheme inserted either 64 or _64 between libFOO and .so
186 - generally libFOO_64.so, but occasionally libFOO64.so
187 """
188 # the expression ending for versions must start as
189 # '.so.[0-9]', i.e., *.so.[at least one digit]
190 # while multiple, more specific expressions could be specified
191 # to search for .so.X, .so.X.Y and .so.X.Y.Z
192 # after the first required 'dot' digit
193 # any combination of additional 'dot' digits pairs are accepted
194 # anything more than libFOO.so.digits.digits.digits
195 # should be seen as a member name outside normal expectations
196 exprs = [rf'lib{name}\.so\.[0-9]+[0-9.]*',
197 rf'lib{name}_?64\.so\.[0-9]+[0-9.]*']
198 for expr in exprs:
199 versions = []
200 for line in members:
201 m = re.search(expr, line)
202 if m:
203 versions.append(m.group(0))
204 if versions:
205 return _last_version(versions, '.')
206 return None
207
208 def get_member(name, members):
209 """
210 Return an archive member matching the request in name.
211 Name is the library name without any prefix like lib, suffix like .so,
212 or version number.
213 Given a list of members find and return the most appropriate result
214 Priority is given to generic libXXX.so, then a versioned libXXX.so.a.b.c
215 and finally, legacy AIX naming scheme.
216 """
217 # look first for a generic match - prepend lib and append .so
218 expr = rf'lib{name}\.so'
219 member = get_one_match(expr, members)
220 if member:
221 return member
222 elif AIX_ABI == 64:
223 expr = rf'lib{name}64\.so'
224 member = get_one_match(expr, members)
225 if member:
226 return member
227 # since an exact match with .so as suffix was not found
228 # look for a versioned name
229 # If a versioned name is not found, look for AIX legacy member name
230 member = get_version(name, members)
231 if member:
232 return member
233 else:
234 return get_legacy(members)
235
236 def get_libpaths():
237 """
238 On AIX, the buildtime searchpath is stored in the executable.
239 as "loader header information".
240 The command /usr/bin/dump -H extracts this info.
241 Prefix searched libraries with LD_LIBRARY_PATH (preferred),
242 or LIBPATH if defined. These paths are appended to the paths
243 to libraries the python executable is linked with.
244 This mimics AIX dlopen() behavior.
245 """
246 libpaths = environ.get("LD_LIBRARY_PATH")
247 if libpaths is None:
248 libpaths = environ.get("LIBPATH")
249 if libpaths is None:
250 libpaths = []
251 else:
252 libpaths = libpaths.split(":")
253 objects = get_ld_headers(executable)
254 for (_, lines) in objects:
255 for line in lines:
256 # the second (optional) argument is PATH if it includes a /
257 path = line.split()[1]
258 if "/" in path:
259 libpaths.extend(path.split(":"))
260 return libpaths
261
262 def find_shared(paths, name):
263 """
264 paths is a list of directories to search for an archive.
265 name is the abbreviated name given to find_library().
266 Process: search "paths" for archive, and if an archive is found
267 return the result of get_member().
268 If an archive is not found then return None
269 """
270 for dir in paths:
271 # /lib is a symbolic link to /usr/lib, skip it
272 if dir == "/lib":
273 continue
274 # "lib" is prefixed to emulate compiler name resolution,
275 # e.g., -lc to libc
276 base = f'lib{name}.a'
277 archive = path.join(dir, base)
278 if path.exists(archive):
279 members = get_shared(get_ld_headers(archive))
280 member = get_member(re.escape(name), members)
281 if member is not None:
282 return (base, member)
283 else:
284 return (None, None)
285 return (None, None)
286
287 def find_library(name):
288 """AIX implementation of ctypes.util.find_library()
289 Find an archive member that will dlopen(). If not available,
290 also search for a file (or link) with a .so suffix.
291
292 AIX supports two types of schemes that can be used with dlopen().
293 The so-called SystemV Release4 (svr4) format is commonly suffixed
294 with .so while the (default) AIX scheme has the library (archive)
295 ending with the suffix .a
296 As an archive has multiple members (e.g., 32-bit and 64-bit) in one file
297 the argument passed to dlopen must include both the library and
298 the member names in a single string.
299
300 find_library() looks first for an archive (.a) with a suitable member.
301 If no archive+member pair is found, look for a .so file.
302 """
303
304 libpaths = get_libpaths()
305 (base, member) = find_shared(libpaths, name)
306 if base is not None:
307 return f"{base}({member})"
308
309 # To get here, a member in an archive has not been found
310 # In other words, either:
311 # a) a .a file was not found
312 # b) a .a file did not have a suitable member
313 # So, look for a .so file
314 # Check libpaths for .so file
315 # Note, the installation must prepare a link from a .so
316 # to a versioned file
317 # This is common practice by GNU libtool on other platforms
318 soname = f"lib{name}.so"
319 for dir in libpaths:
320 # /lib is a symbolic link to /usr/lib, skip it
321 if dir == "/lib":
322 continue
323 shlib = path.join(dir, soname)
324 if path.exists(shlib):
325 return soname
326 # if we are here, we have not found anything plausible
327 return None