1 """Conversion pipeline templates.
2
3 The problem:
4 ------------
5
6 Suppose you have some data that you want to convert to another format,
7 such as from GIF image format to PPM image format. Maybe the
8 conversion involves several steps (e.g. piping it through compress or
9 uuencode). Some of the conversion steps may require that their input
10 is a disk file, others may be able to read standard input; similar for
11 their output. The input to the entire conversion may also be read
12 from a disk file or from an open file, and similar for its output.
13
14 The module lets you construct a pipeline template by sticking one or
15 more conversion steps together. It will take care of creating and
16 removing temporary files if they are necessary to hold intermediate
17 data. You can then use the template to do conversions from many
18 different sources to many different destinations. The temporary
19 file names used are different each time the template is used.
20
21 The templates are objects so you can create templates for many
22 different conversion steps and store them in a dictionary, for
23 instance.
24
25
26 Directions:
27 -----------
28
29 To create a template:
30 t = Template()
31
32 To add a conversion step to a template:
33 t.append(command, kind)
34 where kind is a string of two characters: the first is '-' if the
35 command reads its standard input or 'f' if it requires a file; the
36 second likewise for the output. The command must be valid /bin/sh
37 syntax. If input or output files are required, they are passed as
38 $IN and $OUT; otherwise, it must be possible to use the command in
39 a pipeline.
40
41 To add a conversion step at the beginning:
42 t.prepend(command, kind)
43
44 To convert a file to another file using a template:
45 sts = t.copy(infile, outfile)
46 If infile or outfile are the empty string, standard input is read or
47 standard output is written, respectively. The return value is the
48 exit status of the conversion pipeline.
49
50 To open a file for reading or writing through a conversion pipeline:
51 fp = t.open(file, mode)
52 where mode is 'r' to read the file, or 'w' to write it -- just like
53 for the built-in function open() or for os.popen().
54
55 To create a new template object initialized to a given one:
56 t2 = t.clone()
57 """ # '
58
59
60 import re
61 import os
62 import tempfile
63 import warnings
64 # we import the quote function rather than the module for backward compat
65 # (quote used to be an undocumented but used function in pipes)
66 from shlex import quote
67
68 warnings._deprecated(__name__, remove=(3, 13))
69
70 __all__ = ["Template"]
71
72 # Conversion step kinds
73
74 FILEIN_FILEOUT = 'ff' # Must read & write real files
75 STDIN_FILEOUT = '-f' # Must write a real file
76 FILEIN_STDOUT = 'f-' # Must read a real file
77 STDIN_STDOUT = '--' # Normal pipeline element
78 SOURCE = '.-' # Must be first, writes stdout
79 SINK = '-.' # Must be last, reads stdin
80
81 stepkinds = [FILEIN_FILEOUT, STDIN_FILEOUT, FILEIN_STDOUT, STDIN_STDOUT, \
82 SOURCE, SINK]
83
84
85 class ESC[4;38;5;81mTemplate:
86 """Class representing a pipeline template."""
87
88 def __init__(self):
89 """Template() returns a fresh pipeline template."""
90 self.debugging = 0
91 self.reset()
92
93 def __repr__(self):
94 """t.__repr__() implements repr(t)."""
95 return '<Template instance, steps=%r>' % (self.steps,)
96
97 def reset(self):
98 """t.reset() restores a pipeline template to its initial state."""
99 self.steps = []
100
101 def clone(self):
102 """t.clone() returns a new pipeline template with identical
103 initial state as the current one."""
104 t = Template()
105 t.steps = self.steps[:]
106 t.debugging = self.debugging
107 return t
108
109 def debug(self, flag):
110 """t.debug(flag) turns debugging on or off."""
111 self.debugging = flag
112
113 def append(self, cmd, kind):
114 """t.append(cmd, kind) adds a new step at the end."""
115 if not isinstance(cmd, str):
116 raise TypeError('Template.append: cmd must be a string')
117 if kind not in stepkinds:
118 raise ValueError('Template.append: bad kind %r' % (kind,))
119 if kind == SOURCE:
120 raise ValueError('Template.append: SOURCE can only be prepended')
121 if self.steps and self.steps[-1][1] == SINK:
122 raise ValueError('Template.append: already ends with SINK')
123 if kind[0] == 'f' and not re.search(r'\$IN\b', cmd):
124 raise ValueError('Template.append: missing $IN in cmd')
125 if kind[1] == 'f' and not re.search(r'\$OUT\b', cmd):
126 raise ValueError('Template.append: missing $OUT in cmd')
127 self.steps.append((cmd, kind))
128
129 def prepend(self, cmd, kind):
130 """t.prepend(cmd, kind) adds a new step at the front."""
131 if not isinstance(cmd, str):
132 raise TypeError('Template.prepend: cmd must be a string')
133 if kind not in stepkinds:
134 raise ValueError('Template.prepend: bad kind %r' % (kind,))
135 if kind == SINK:
136 raise ValueError('Template.prepend: SINK can only be appended')
137 if self.steps and self.steps[0][1] == SOURCE:
138 raise ValueError('Template.prepend: already begins with SOURCE')
139 if kind[0] == 'f' and not re.search(r'\$IN\b', cmd):
140 raise ValueError('Template.prepend: missing $IN in cmd')
141 if kind[1] == 'f' and not re.search(r'\$OUT\b', cmd):
142 raise ValueError('Template.prepend: missing $OUT in cmd')
143 self.steps.insert(0, (cmd, kind))
144
145 def open(self, file, rw):
146 """t.open(file, rw) returns a pipe or file object open for
147 reading or writing; the file is the other end of the pipeline."""
148 if rw == 'r':
149 return self.open_r(file)
150 if rw == 'w':
151 return self.open_w(file)
152 raise ValueError('Template.open: rw must be \'r\' or \'w\', not %r'
153 % (rw,))
154
155 def open_r(self, file):
156 """t.open_r(file) and t.open_w(file) implement
157 t.open(file, 'r') and t.open(file, 'w') respectively."""
158 if not self.steps:
159 return open(file, 'r')
160 if self.steps[-1][1] == SINK:
161 raise ValueError('Template.open_r: pipeline ends width SINK')
162 cmd = self.makepipeline(file, '')
163 return os.popen(cmd, 'r')
164
165 def open_w(self, file):
166 if not self.steps:
167 return open(file, 'w')
168 if self.steps[0][1] == SOURCE:
169 raise ValueError('Template.open_w: pipeline begins with SOURCE')
170 cmd = self.makepipeline('', file)
171 return os.popen(cmd, 'w')
172
173 def copy(self, infile, outfile):
174 return os.system(self.makepipeline(infile, outfile))
175
176 def makepipeline(self, infile, outfile):
177 cmd = makepipeline(infile, self.steps, outfile)
178 if self.debugging:
179 print(cmd)
180 cmd = 'set -x; ' + cmd
181 return cmd
182
183
184 def makepipeline(infile, steps, outfile):
185 # Build a list with for each command:
186 # [input filename or '', command string, kind, output filename or '']
187
188 list = []
189 for cmd, kind in steps:
190 list.append(['', cmd, kind, ''])
191 #
192 # Make sure there is at least one step
193 #
194 if not list:
195 list.append(['', 'cat', '--', ''])
196 #
197 # Take care of the input and output ends
198 #
199 [cmd, kind] = list[0][1:3]
200 if kind[0] == 'f' and not infile:
201 list.insert(0, ['', 'cat', '--', ''])
202 list[0][0] = infile
203 #
204 [cmd, kind] = list[-1][1:3]
205 if kind[1] == 'f' and not outfile:
206 list.append(['', 'cat', '--', ''])
207 list[-1][-1] = outfile
208 #
209 # Invent temporary files to connect stages that need files
210 #
211 garbage = []
212 for i in range(1, len(list)):
213 lkind = list[i-1][2]
214 rkind = list[i][2]
215 if lkind[1] == 'f' or rkind[0] == 'f':
216 (fd, temp) = tempfile.mkstemp()
217 os.close(fd)
218 garbage.append(temp)
219 list[i-1][-1] = list[i][0] = temp
220 #
221 for item in list:
222 [inf, cmd, kind, outf] = item
223 if kind[1] == 'f':
224 cmd = 'OUT=' + quote(outf) + '; ' + cmd
225 if kind[0] == 'f':
226 cmd = 'IN=' + quote(inf) + '; ' + cmd
227 if kind[0] == '-' and inf:
228 cmd = cmd + ' <' + quote(inf)
229 if kind[1] == '-' and outf:
230 cmd = cmd + ' >' + quote(outf)
231 item[1] = cmd
232 #
233 cmdlist = list[0][1]
234 for item in list[1:]:
235 [cmd, kind] = item[1:3]
236 if item[0] == '':
237 if 'f' in kind:
238 cmd = '{ ' + cmd + '; }'
239 cmdlist = cmdlist + ' |\n' + cmd
240 else:
241 cmdlist = cmdlist + '\n' + cmd
242 #
243 if garbage:
244 rmcmd = 'rm -f'
245 for file in garbage:
246 rmcmd = rmcmd + ' ' + quote(file)
247 trapcmd = 'trap ' + quote(rmcmd + '; exit') + ' 1 2 3 13 14 15'
248 cmdlist = trapcmd + '\n' + cmdlist + '\n' + rmcmd
249 #
250 return cmdlist