1 """Routines to help recognizing sound files.
2
3 Function whathdr() recognizes various types of sound file headers.
4 It understands almost all headers that SOX can decode.
5
6 The return tuple contains the following items, in this order:
7 - file type (as SOX understands it)
8 - sampling rate (0 if unknown or hard to decode)
9 - number of channels (0 if unknown or hard to decode)
10 - number of frames in the file (-1 if unknown or hard to decode)
11 - number of bits/sample, or 'U' for U-LAW, or 'A' for A-LAW
12
13 If the file doesn't have a recognizable type, it returns None.
14 If the file can't be opened, OSError is raised.
15
16 To compute the total time, divide the number of frames by the
17 sampling rate (a frame contains a sample for each channel).
18
19 Function what() calls whathdr(). (It used to also use some
20 heuristics for raw data, but this doesn't work very well.)
21
22 Finally, the function test() is a simple main program that calls
23 what() for all files mentioned on the argument list. For directory
24 arguments it calls what() for all files in that directory. Default
25 argument is "." (testing all files in the current directory). The
26 option -r tells it to recurse down directories found inside
27 explicitly given directories.
28 """
29
30 import warnings
31
32 warnings._deprecated(__name__, remove=(3, 13))
33
34 # The file structure is top-down except that the test program and its
35 # subroutine come last.
36
37 __all__ = ['what', 'whathdr']
38
39 from collections import namedtuple
40
41 SndHeaders = namedtuple('SndHeaders',
42 'filetype framerate nchannels nframes sampwidth')
43
44 SndHeaders.filetype.__doc__ = ("""The value for type indicates the data type
45 and will be one of the strings 'aifc', 'aiff', 'au','hcom',
46 'sndr', 'sndt', 'voc', 'wav', '8svx', 'sb', 'ub', or 'ul'.""")
47 SndHeaders.framerate.__doc__ = ("""The sampling_rate will be either the actual
48 value or 0 if unknown or difficult to decode.""")
49 SndHeaders.nchannels.__doc__ = ("""The number of channels or 0 if it cannot be
50 determined or if the value is difficult to decode.""")
51 SndHeaders.nframes.__doc__ = ("""The value for frames will be either the number
52 of frames or -1.""")
53 SndHeaders.sampwidth.__doc__ = ("""Either the sample size in bits or
54 'A' for A-LAW or 'U' for u-LAW.""")
55
56 def what(filename):
57 """Guess the type of a sound file."""
58 res = whathdr(filename)
59 return res
60
61
62 def whathdr(filename):
63 """Recognize sound headers."""
64 with open(filename, 'rb') as f:
65 h = f.read(512)
66 for tf in tests:
67 res = tf(h, f)
68 if res:
69 return SndHeaders(*res)
70 return None
71
72
73 #-----------------------------------#
74 # Subroutines per sound header type #
75 #-----------------------------------#
76
77 tests = []
78
79 def test_aifc(h, f):
80 """AIFC and AIFF files"""
81 with warnings.catch_warnings():
82 warnings.simplefilter('ignore', category=DeprecationWarning)
83 import aifc
84 if not h.startswith(b'FORM'):
85 return None
86 if h[8:12] == b'AIFC':
87 fmt = 'aifc'
88 elif h[8:12] == b'AIFF':
89 fmt = 'aiff'
90 else:
91 return None
92 f.seek(0)
93 try:
94 a = aifc.open(f, 'r')
95 except (EOFError, aifc.Error):
96 return None
97 return (fmt, a.getframerate(), a.getnchannels(),
98 a.getnframes(), 8 * a.getsampwidth())
99
100 tests.append(test_aifc)
101
102
103 def test_au(h, f):
104 """AU and SND files"""
105 if h.startswith(b'.snd'):
106 func = get_long_be
107 elif h[:4] in (b'\0ds.', b'dns.'):
108 func = get_long_le
109 else:
110 return None
111 filetype = 'au'
112 hdr_size = func(h[4:8])
113 data_size = func(h[8:12])
114 encoding = func(h[12:16])
115 rate = func(h[16:20])
116 nchannels = func(h[20:24])
117 sample_size = 1 # default
118 if encoding == 1:
119 sample_bits = 'U'
120 elif encoding == 2:
121 sample_bits = 8
122 elif encoding == 3:
123 sample_bits = 16
124 sample_size = 2
125 else:
126 sample_bits = '?'
127 frame_size = sample_size * nchannels
128 if frame_size:
129 nframe = data_size / frame_size
130 else:
131 nframe = -1
132 return filetype, rate, nchannels, nframe, sample_bits
133
134 tests.append(test_au)
135
136
137 def test_hcom(h, f):
138 """HCOM file"""
139 if h[65:69] != b'FSSD' or h[128:132] != b'HCOM':
140 return None
141 divisor = get_long_be(h[144:148])
142 if divisor:
143 rate = 22050 / divisor
144 else:
145 rate = 0
146 return 'hcom', rate, 1, -1, 8
147
148 tests.append(test_hcom)
149
150
151 def test_voc(h, f):
152 """VOC file"""
153 if not h.startswith(b'Creative Voice File\032'):
154 return None
155 sbseek = get_short_le(h[20:22])
156 rate = 0
157 if 0 <= sbseek < 500 and h[sbseek] == 1:
158 ratecode = 256 - h[sbseek+4]
159 if ratecode:
160 rate = int(1000000.0 / ratecode)
161 return 'voc', rate, 1, -1, 8
162
163 tests.append(test_voc)
164
165
166 def test_wav(h, f):
167 """WAV file"""
168 import wave
169 # 'RIFF' <len> 'WAVE' 'fmt ' <len>
170 if not h.startswith(b'RIFF') or h[8:12] != b'WAVE' or h[12:16] != b'fmt ':
171 return None
172 f.seek(0)
173 try:
174 w = wave.open(f, 'r')
175 except (EOFError, wave.Error):
176 return None
177 return ('wav', w.getframerate(), w.getnchannels(),
178 w.getnframes(), 8*w.getsampwidth())
179
180 tests.append(test_wav)
181
182
183 def test_8svx(h, f):
184 """8SVX file"""
185 if not h.startswith(b'FORM') or h[8:12] != b'8SVX':
186 return None
187 # Should decode it to get #channels -- assume always 1
188 return '8svx', 0, 1, 0, 8
189
190 tests.append(test_8svx)
191
192
193 def test_sndt(h, f):
194 """SNDT file"""
195 if h.startswith(b'SOUND'):
196 nsamples = get_long_le(h[8:12])
197 rate = get_short_le(h[20:22])
198 return 'sndt', rate, 1, nsamples, 8
199
200 tests.append(test_sndt)
201
202
203 def test_sndr(h, f):
204 """SNDR file"""
205 if h.startswith(b'\0\0'):
206 rate = get_short_le(h[2:4])
207 if 4000 <= rate <= 25000:
208 return 'sndr', rate, 1, -1, 8
209
210 tests.append(test_sndr)
211
212
213 #-------------------------------------------#
214 # Subroutines to extract numbers from bytes #
215 #-------------------------------------------#
216
217 def get_long_be(b):
218 return (b[0] << 24) | (b[1] << 16) | (b[2] << 8) | b[3]
219
220 def get_long_le(b):
221 return (b[3] << 24) | (b[2] << 16) | (b[1] << 8) | b[0]
222
223 def get_short_be(b):
224 return (b[0] << 8) | b[1]
225
226 def get_short_le(b):
227 return (b[1] << 8) | b[0]
228
229
230 #--------------------#
231 # Small test program #
232 #--------------------#
233
234 def test():
235 import sys
236 recursive = 0
237 if sys.argv[1:] and sys.argv[1] == '-r':
238 del sys.argv[1:2]
239 recursive = 1
240 try:
241 if sys.argv[1:]:
242 testall(sys.argv[1:], recursive, 1)
243 else:
244 testall(['.'], recursive, 1)
245 except KeyboardInterrupt:
246 sys.stderr.write('\n[Interrupted]\n')
247 sys.exit(1)
248
249 def testall(list, recursive, toplevel):
250 import sys
251 import os
252 for filename in list:
253 if os.path.isdir(filename):
254 print(filename + '/:', end=' ')
255 if recursive or toplevel:
256 print('recursing down:')
257 import glob
258 names = glob.glob(os.path.join(glob.escape(filename), '*'))
259 testall(names, recursive, 0)
260 else:
261 print('*** directory (use -r) ***')
262 else:
263 print(filename + ':', end=' ')
264 sys.stdout.flush()
265 try:
266 print(what(filename))
267 except OSError:
268 print('*** not found ***')
269
270 if __name__ == '__main__':
271 test()