]>
Commit | Line | Data |
---|---|---|
9250dd03 ER |
1 | diff --git a/createrepo.bash b/createrepo.bash |
2 | index 54ac8b2..f5a8bb7 100644 | |
3 | --- a/createrepo.bash | |
4 | +++ b/createrepo.bash | |
5 | @@ -1,11 +1,17 @@ | |
6 | # bash completion for createrepo and friends | |
7 | ||
8 | +_cr_compress_type() | |
9 | +{ | |
10 | + COMPREPLY=( $( compgen -W "$( ${1:-createrepo} --compress-type=FOO / 2>&1 \ | |
11 | + | sed -ne 's/,/ /g' -ne 's/.*[Cc]ompression.*://p' )" -- "$2" ) ) | |
12 | +} | |
13 | + | |
14 | _cr_createrepo() | |
15 | { | |
16 | COMPREPLY=() | |
17 | ||
18 | case $3 in | |
19 | - --version|-h|--help|-u|--baseurl|--distro|--content|--repo|--workers|\ | |
20 | + --version|-h|--help|-u|--baseurl|--distro|--content|--repo|\ | |
21 | --revision|-x|--excludes|--changelog-limit|--max-delta-rpm-size) | |
22 | return 0 | |
23 | ;; | |
24 | @@ -30,10 +36,24 @@ _cr_createrepo() | |
25 | COMPREPLY=( $( compgen -f -o plusdirs -X '!*.rpm' -- "$2" ) ) | |
26 | return 0 | |
27 | ;; | |
28 | + --retain-old-md) | |
29 | + COMPREPLY=( $( compgen -W '0 1 2 3 4 5 6 7 8 9' -- "$2" ) ) | |
30 | + return 0 | |
31 | + ;; | |
32 | --num-deltas) | |
33 | COMPREPLY=( $( compgen -W '1 2 3 4 5 6 7 8 9' -- "$2" ) ) | |
34 | return 0 | |
35 | ;; | |
36 | + --workers) | |
37 | + local min=2 max=$( getconf _NPROCESSORS_ONLN 2>/dev/null ) | |
38 | + [[ -z $max || $max -lt $min ]] && max=$min | |
39 | + COMPREPLY=( $( compgen -W "{1..$max}" -- "$2" ) ) | |
40 | + return 0 | |
41 | + ;; | |
42 | + --compress-type) | |
43 | + _cr_compress_type "$1" "$2" | |
44 | + return 0 | |
45 | + ;; | |
46 | esac | |
47 | ||
48 | if [[ $2 == -* ]] ; then | |
49 | @@ -42,9 +62,9 @@ _cr_createrepo() | |
50 | --cachedir --checkts --no-database --update --update-md-path | |
51 | --skip-stat --split --pkglist --includepkg --outputdir | |
52 | --skip-symlinks --changelog-limit --unique-md-filenames | |
53 | - --simple-md-filenames --distro --content --repo --revision --deltas | |
54 | - --oldpackagedirs --num-deltas --read-pkgs-list | |
55 | - --max-delta-rpm-size --workers' -- "$2" ) ) | |
56 | + --simple-md-filenames --retain-old-md --distro --content --repo | |
57 | + --revision --deltas --oldpackagedirs --num-deltas --read-pkgs-list | |
58 | + --max-delta-rpm-size --workers --compress-type' -- "$2" ) ) | |
59 | else | |
60 | COMPREPLY=( $( compgen -d -- "$2" ) ) | |
61 | fi | |
62 | @@ -63,10 +83,14 @@ _cr_mergerepo() | |
63 | COMPREPLY=( $( compgen -d -- "$2" ) ) | |
64 | return 0 | |
65 | ;; | |
66 | + --compress-type) | |
67 | + _cr_compress_type "" "$2" | |
68 | + return 0 | |
69 | + ;; | |
70 | esac | |
71 | ||
72 | COMPREPLY=( $( compgen -W '--version --help --repo --archlist --no-database | |
73 | - --outputdir --nogroups --noupdateinfo' -- "$2" ) ) | |
74 | + --outputdir --nogroups --noupdateinfo --compress-type' -- "$2" ) ) | |
75 | } && | |
76 | complete -F _cr_mergerepo -o filenames mergerepo mergerepo.py | |
77 | ||
78 | @@ -78,17 +102,22 @@ _cr_modifyrepo() | |
79 | --version|-h|--help|--mdtype) | |
80 | return 0 | |
81 | ;; | |
82 | + --compress-type) | |
83 | + _cr_compress_type "" "$2" | |
84 | + return 0 | |
85 | + ;; | |
86 | esac | |
87 | ||
88 | if [[ $2 == -* ]] ; then | |
89 | - COMPREPLY=( $( compgen -W '--version --help --mdtype' -- "$2" ) ) | |
90 | + COMPREPLY=( $( compgen -W '--version --help --mdtype --remove | |
91 | + --compress --compress-type' -- "$2" ) ) | |
92 | return 0 | |
93 | fi | |
94 | ||
95 | local i argnum=1 | |
96 | for (( i=1; i < ${#COMP_WORDS[@]}-1; i++ )) ; do | |
97 | if [[ ${COMP_WORDS[i]} != -* && | |
98 | - ${COMP_WORDS[i-1]} != @(=|--mdtype) ]]; then | |
99 | + ${COMP_WORDS[i-1]} != @(=|--@(md|compress-)type) ]]; then | |
100 | argnum=$(( argnum+1 )) | |
101 | fi | |
102 | done | |
103 | diff --git a/createrepo.spec b/createrepo.spec | |
104 | index 1e491cd..eea7092 100644 | |
105 | --- a/createrepo.spec | |
106 | +++ b/createrepo.spec | |
107 | @@ -11,7 +11,7 @@ URL: http://createrepo.baseurl.org/ | |
108 | BuildRoot: %{_tmppath}/%{name}-%{version}root | |
109 | BuildArchitectures: noarch | |
110 | Requires: python >= 2.1, rpm-python, rpm >= 0:4.1.1, libxml2-python | |
111 | -Requires: yum-metadata-parser, yum >= 3.2.29, python-deltarpm | |
112 | +Requires: yum-metadata-parser, yum >= 3.2.29, python-deltarpm, pyliblzma | |
113 | ||
114 | %description | |
115 | This utility will generate a common metadata repository from a directory of | |
116 | @@ -43,6 +43,9 @@ make DESTDIR=$RPM_BUILD_ROOT sysconfdir=%{_sysconfdir} install | |
117 | %{python_sitelib}/createrepo | |
118 | ||
119 | %changelog | |
120 | +* Fri Sep 9 2011 Seth Vidal <skvidal at fedoraproject.org> | |
121 | +- add lzma dep | |
122 | + | |
123 | * Wed Jan 26 2011 Seth Vidal <skvidal at fedoraproject.org> | |
124 | - bump to 0.9.9 | |
125 | - add worker.py | |
126 | diff --git a/createrepo/__init__.py b/createrepo/__init__.py | |
127 | index 8f2538e..1b18a9f 100644 | |
128 | --- a/createrepo/__init__.py | |
129 | +++ b/createrepo/__init__.py | |
130 | @@ -26,15 +26,16 @@ import tempfile | |
131 | import stat | |
132 | import fcntl | |
133 | import subprocess | |
134 | +from select import select | |
135 | ||
136 | -from yum import misc, Errors, to_unicode | |
137 | -from yum.repoMDObject import RepoMD, RepoMDError, RepoData | |
138 | +from yum import misc, Errors | |
139 | +from yum.repoMDObject import RepoMD, RepoData | |
140 | from yum.sqlutils import executeSQL | |
141 | from yum.packageSack import MetaSack | |
142 | -from yum.packages import YumAvailablePackage, YumLocalPackage | |
143 | +from yum.packages import YumAvailablePackage | |
144 | ||
145 | import rpmUtils.transaction | |
146 | -from utils import _, errorprint, MDError | |
147 | +from utils import _, errorprint, MDError, lzma, _available_compression | |
148 | import readMetadata | |
149 | try: | |
150 | import sqlite3 as sqlite | |
151 | @@ -46,8 +47,9 @@ try: | |
152 | except ImportError: | |
153 | pass | |
154 | ||
155 | -from utils import _gzipOpen, bzipFile, checkAndMakeDir, GzipFile, \ | |
156 | +from utils import _gzipOpen, compressFile, compressOpen, checkAndMakeDir, GzipFile, \ | |
157 | checksum_and_rename, split_list_into_equal_chunks | |
158 | +from utils import num_cpus_online | |
159 | import deltarpms | |
160 | ||
161 | __version__ = '0.9.9' | |
162 | @@ -74,7 +76,7 @@ class MetaDataConfig(object): | |
163 | self.deltadir = None | |
164 | self.delta_relative = 'drpms/' | |
165 | self.oldpackage_paths = [] # where to look for the old packages - | |
166 | - self.deltafile = 'prestodelta.xml.gz' | |
167 | + self.deltafile = 'prestodelta.xml' | |
168 | self.num_deltas = 1 # number of older versions to delta (max) | |
169 | self.max_delta_rpm_size = 100000000 | |
170 | self.update_md_path = None | |
171 | @@ -86,9 +88,9 @@ class MetaDataConfig(object): | |
172 | self.skip_symlinks = False | |
173 | self.pkglist = [] | |
174 | self.database_only = False | |
175 | - self.primaryfile = 'primary.xml.gz' | |
176 | - self.filelistsfile = 'filelists.xml.gz' | |
177 | - self.otherfile = 'other.xml.gz' | |
178 | + self.primaryfile = 'primary.xml' | |
179 | + self.filelistsfile = 'filelists.xml' | |
180 | + self.otherfile = 'other.xml' | |
181 | self.repomdfile = 'repomd.xml' | |
182 | self.tempdir = '.repodata' | |
183 | self.finaldir = 'repodata' | |
184 | @@ -108,8 +110,10 @@ class MetaDataConfig(object): | |
185 | self.collapse_glibc_requires = True | |
186 | self.workers = 1 # number of workers to fork off to grab metadata from the pkgs | |
187 | self.worker_cmd = '/usr/share/createrepo/worker.py' | |
188 | - | |
189 | #self.worker_cmd = './worker.py' # helpful when testing | |
190 | + self.retain_old_md = 0 | |
191 | + self.compress_type = 'compat' | |
192 | + | |
193 | ||
194 | class SimpleMDCallBack(object): | |
195 | def errorlog(self, thing): | |
196 | @@ -141,10 +145,23 @@ class MetaDataGenerator: | |
197 | self.files = [] | |
198 | self.rpmlib_reqs = {} | |
199 | self.read_pkgs = [] | |
200 | + self.compat_compress = False | |
201 | ||
202 | if not self.conf.directory and not self.conf.directories: | |
203 | raise MDError, "No directory given on which to run." | |
204 | - | |
205 | + | |
206 | + if self.conf.compress_type == 'compat': | |
207 | + self.compat_compress = True | |
208 | + self.conf.compress_type = None | |
209 | + | |
210 | + if not self.conf.compress_type: | |
211 | + self.conf.compress_type = 'gz' | |
212 | + | |
213 | + if self.conf.compress_type not in utils._available_compression: | |
214 | + raise MDError, "Compression %s not available: Please choose from: %s" \ | |
215 | + % (self.conf.compress_type, ', '.join(utils._available_compression)) | |
216 | + | |
217 | + | |
218 | if not self.conf.directories: # just makes things easier later | |
219 | self.conf.directories = [self.conf.directory] | |
220 | if not self.conf.directory: # ensure we have both in the config object | |
221 | @@ -290,14 +307,13 @@ class MetaDataGenerator: | |
222 | ||
223 | def extension_visitor(filelist, dirname, names): | |
224 | for fn in names: | |
225 | + fn = os.path.join(dirname, fn) | |
226 | if os.path.isdir(fn): | |
227 | continue | |
228 | if self.conf.skip_symlinks and os.path.islink(fn): | |
229 | continue | |
230 | elif fn[-extlen:].lower() == '%s' % (ext): | |
231 | - relativepath = dirname.replace(startdir, "", 1) | |
232 | - relativepath = relativepath.lstrip("/") | |
233 | - filelist.append(os.path.join(relativepath, fn)) | |
234 | + filelist.append(fn[len(startdir):]) | |
235 | ||
236 | filelist = [] | |
237 | startdir = directory + '/' | |
238 | @@ -311,7 +327,7 @@ class MetaDataGenerator: | |
239 | def checkTimeStamps(self): | |
240 | """check the timestamp of our target dir. If it is not newer than | |
241 | the repodata return False, else True""" | |
242 | - if self.conf.checkts: | |
243 | + if self.conf.checkts and self.conf.mdtimestamp: | |
244 | dn = os.path.join(self.conf.basedir, self.conf.directory) | |
245 | files = self.getFileList(dn, '.rpm') | |
246 | files = self.trimRpms(files) | |
247 | @@ -410,9 +426,11 @@ class MetaDataGenerator: | |
248 | ||
249 | def _setupPrimary(self): | |
250 | # setup the primary metadata file | |
251 | + # FIXME - make this be conf.compress_type once y-m-p is fixed | |
252 | + fpz = self.conf.primaryfile + '.' + 'gz' | |
253 | primaryfilepath = os.path.join(self.conf.outputdir, self.conf.tempdir, | |
254 | - self.conf.primaryfile) | |
255 | - fo = _gzipOpen(primaryfilepath, 'w') | |
256 | + fpz) | |
257 | + fo = compressOpen(primaryfilepath, 'w', 'gz') | |
258 | fo.write('<?xml version="1.0" encoding="UTF-8"?>\n') | |
259 | fo.write('<metadata xmlns="http://linux.duke.edu/metadata/common"' \ | |
260 | ' xmlns:rpm="http://linux.duke.edu/metadata/rpm" packages="%s">' % | |
261 | @@ -421,9 +439,11 @@ class MetaDataGenerator: | |
262 | ||
263 | def _setupFilelists(self): | |
264 | # setup the filelist file | |
265 | + # FIXME - make this be conf.compress_type once y-m-p is fixed | |
266 | + fpz = self.conf.filelistsfile + '.' + 'gz' | |
267 | filelistpath = os.path.join(self.conf.outputdir, self.conf.tempdir, | |
268 | - self.conf.filelistsfile) | |
269 | - fo = _gzipOpen(filelistpath, 'w') | |
270 | + fpz) | |
271 | + fo = compressOpen(filelistpath, 'w', 'gz') | |
272 | fo.write('<?xml version="1.0" encoding="UTF-8"?>\n') | |
273 | fo.write('<filelists xmlns="http://linux.duke.edu/metadata/filelists"' \ | |
274 | ' packages="%s">' % self.pkgcount) | |
275 | @@ -431,9 +451,11 @@ class MetaDataGenerator: | |
276 | ||
277 | def _setupOther(self): | |
278 | # setup the other file | |
279 | + # FIXME - make this be conf.compress_type once y-m-p is fixed | |
280 | + fpz = self.conf.otherfile + '.' + 'gz' | |
281 | otherfilepath = os.path.join(self.conf.outputdir, self.conf.tempdir, | |
282 | - self.conf.otherfile) | |
283 | - fo = _gzipOpen(otherfilepath, 'w') | |
284 | + fpz) | |
285 | + fo = compressOpen(otherfilepath, 'w', 'gz') | |
286 | fo.write('<?xml version="1.0" encoding="UTF-8"?>\n') | |
287 | fo.write('<otherdata xmlns="http://linux.duke.edu/metadata/other"' \ | |
288 | ' packages="%s">' % | |
289 | @@ -442,9 +464,10 @@ class MetaDataGenerator: | |
290 | ||
291 | def _setupDelta(self): | |
292 | # setup the other file | |
293 | + fpz = self.conf.deltafile + '.' + self.conf.compress_type | |
294 | deltafilepath = os.path.join(self.conf.outputdir, self.conf.tempdir, | |
295 | - self.conf.deltafile) | |
296 | - fo = _gzipOpen(deltafilepath, 'w') | |
297 | + fpz) | |
298 | + fo = compressOpen(deltafilepath, 'w', self.conf.compress_type) | |
299 | fo.write('<?xml version="1.0" encoding="UTF-8"?>\n') | |
300 | fo.write('<prestodelta>\n') | |
301 | return fo | |
302 | @@ -520,6 +543,7 @@ class MetaDataGenerator: | |
303 | # go on their merry way | |
304 | ||
305 | newpkgs = [] | |
306 | + keptpkgs = [] | |
307 | if self.conf.update: | |
308 | # if we're in --update mode then only act on the new/changed pkgs | |
309 | for pkg in pkglist: | |
310 | @@ -530,39 +554,13 @@ class MetaDataGenerator: | |
311 | old_pkg = pkg | |
312 | if pkg.find("://") != -1: | |
313 | old_pkg = os.path.basename(pkg) | |
314 | - nodes = self.oldData.getNodes(old_pkg) | |
315 | - if nodes is not None: # we have a match in the old metadata | |
316 | + old_po = self.oldData.getNodes(old_pkg) | |
317 | + if old_po: # we have a match in the old metadata | |
318 | if self.conf.verbose: | |
319 | self.callback.log(_("Using data from old metadata for %s") | |
320 | % pkg) | |
321 | - (primarynode, filenode, othernode) = nodes | |
322 | - | |
323 | - for node, outfile in ((primarynode, self.primaryfile), | |
324 | - (filenode, self.flfile), | |
325 | - (othernode, self.otherfile)): | |
326 | - if node is None: | |
327 | - break | |
328 | - | |
329 | - if self.conf.baseurl: | |
330 | - anode = node.children | |
331 | - while anode is not None: | |
332 | - if anode.type != "element": | |
333 | - anode = anode.next | |
334 | - continue | |
335 | - if anode.name == "location": | |
336 | - anode.setProp('xml:base', self.conf.baseurl) | |
337 | - anode = anode.next | |
338 | - | |
339 | - output = node.serialize('UTF-8', self.conf.pretty) | |
340 | - if output: | |
341 | - outfile.write(output) | |
342 | - else: | |
343 | - if self.conf.verbose: | |
344 | - self.callback.log(_("empty serialize on write to" \ | |
345 | - "%s in %s") % (outfile, pkg)) | |
346 | - outfile.write('\n') | |
347 | - | |
348 | - self.oldData.freeNodes(pkg) | |
349 | + keptpkgs.append((pkg, old_po)) | |
350 | + | |
351 | #FIXME - if we're in update and we have deltas enabled | |
352 | # check the presto data for this pkg and write its info back out | |
353 | # to our deltafile | |
354 | @@ -584,32 +582,45 @@ class MetaDataGenerator: | |
355 | po = None | |
356 | if isinstance(pkg, YumAvailablePackage): | |
357 | po = pkg | |
358 | - self.read_pkgs.append(po.localpath) | |
359 | + self.read_pkgs.append(po.localPkg()) | |
360 | ||
361 | # if we're dealing with remote pkgs - pitch it over to doing | |
362 | # them one at a time, for now. | |
363 | elif pkg.find('://') != -1: | |
364 | - po = self.read_in_package(pkgfile, pkgpath=pkgpath, reldir=reldir) | |
365 | + po = self.read_in_package(pkg, pkgpath=pkgpath, reldir=reldir) | |
366 | self.read_pkgs.append(pkg) | |
367 | ||
368 | if po: | |
369 | - self.primaryfile.write(po.xml_dump_primary_metadata()) | |
370 | - self.flfile.write(po.xml_dump_filelists_metadata()) | |
371 | - self.otherfile.write(po.xml_dump_other_metadata( | |
372 | - clog_limit=self.conf.changelog_limit)) | |
373 | + keptpkgs.append((pkg, po)) | |
374 | continue | |
375 | ||
376 | pkgfiles.append(pkg) | |
377 | - | |
378 | - | |
379 | + | |
380 | + keptpkgs.sort(reverse=True) | |
381 | + # keptkgs is a list of (filename, po), pkgfiles is a list if filenames. | |
382 | + # Need to write them in sorted(filename) order. We loop over pkgfiles, | |
383 | + # inserting keptpkgs in right spots (using the upto argument). | |
384 | + def save_keptpkgs(upto): | |
385 | + while keptpkgs and (upto is None or keptpkgs[-1][0] < upto): | |
386 | + filename, po = keptpkgs.pop() | |
387 | + # reset baseurl in the old pkg | |
388 | + po.basepath = self.conf.baseurl | |
389 | + self.primaryfile.write(po.xml_dump_primary_metadata()) | |
390 | + self.flfile.write(po.xml_dump_filelists_metadata()) | |
391 | + self.otherfile.write(po.xml_dump_other_metadata( | |
392 | + clog_limit=self.conf.changelog_limit)) | |
393 | + | |
394 | if pkgfiles: | |
395 | # divide that list by the number of workers and fork off that many | |
396 | # workers to tmpdirs | |
397 | # waitfor the workers to finish and as each one comes in | |
398 | # open the files they created and write them out to our metadata | |
399 | # add up the total pkg counts and return that value | |
400 | - worker_tmp_path = tempfile.mkdtemp() | |
401 | - worker_chunks = utils.split_list_into_equal_chunks(pkgfiles, self.conf.workers) | |
402 | + self._worker_tmp_path = tempfile.mkdtemp() # setting this in the base object so we can clean it up later | |
403 | + if self.conf.workers < 1: | |
404 | + self.conf.workers = num_cpus_online() | |
405 | + pkgfiles.sort() | |
406 | + worker_chunks = split_list_into_equal_chunks(pkgfiles, self.conf.workers) | |
407 | worker_cmd_dict = {} | |
408 | worker_jobs = {} | |
409 | base_worker_cmdline = [self.conf.worker_cmd, | |
410 | @@ -617,7 +628,8 @@ class MetaDataGenerator: | |
411 | '--pkgoptions=_collapse_libc_requires=%s' % self.conf.collapse_glibc_requires, | |
412 | '--pkgoptions=_cachedir=%s' % self.conf.cachedir, | |
413 | '--pkgoptions=_baseurl=%s' % self.conf.baseurl, | |
414 | - '--globalopts=clog_limit=%s' % self.conf.changelog_limit,] | |
415 | + '--globalopts=clog_limit=%s' % self.conf.changelog_limit, | |
416 | + '--globalopts=sumtype=%s' % self.conf.sumtype, ] | |
417 | ||
418 | if self.conf.quiet: | |
419 | base_worker_cmdline.append('--quiet') | |
420 | @@ -626,15 +638,14 @@ class MetaDataGenerator: | |
421 | base_worker_cmdline.append('--verbose') | |
422 | ||
423 | for worker_num in range(self.conf.workers): | |
424 | - # make the worker directory | |
425 | + pkl = self._worker_tmp_path + '/pkglist-%s' % worker_num | |
426 | + f = open(pkl, 'w') | |
427 | + f.write('\n'.join(worker_chunks[worker_num])) | |
428 | + f.close() | |
429 | + | |
430 | workercmdline = [] | |
431 | workercmdline.extend(base_worker_cmdline) | |
432 | - thisdir = worker_tmp_path + '/' + str(worker_num) | |
433 | - if checkAndMakeDir(thisdir): | |
434 | - workercmdline.append('--tmpmdpath=%s' % thisdir) | |
435 | - else: | |
436 | - raise MDError, "Unable to create worker path: %s" % thisdir | |
437 | - workercmdline.extend(worker_chunks[worker_num]) | |
438 | + workercmdline.append('--pkglist=%s/pkglist-%s' % (self._worker_tmp_path, worker_num)) | |
439 | worker_cmd_dict[worker_num] = workercmdline | |
440 | ||
441 | ||
442 | @@ -647,49 +658,60 @@ class MetaDataGenerator: | |
443 | stderr=subprocess.PIPE) | |
444 | worker_jobs[num] = job | |
445 | ||
446 | - gimmebreak = 0 | |
447 | - while gimmebreak != len(worker_jobs.keys()): | |
448 | - gimmebreak = 0 | |
449 | - for (num,job) in worker_jobs.items(): | |
450 | - if job.poll() is not None: | |
451 | - gimmebreak+=1 | |
452 | - line = job.stdout.readline() | |
453 | - if line: | |
454 | + files = self.primaryfile, self.flfile, self.otherfile | |
455 | + def log_messages(num): | |
456 | + job = worker_jobs[num] | |
457 | + while True: | |
458 | + # check stdout and stderr | |
459 | + for stream in select((job.stdout, job.stderr), (), ())[0]: | |
460 | + line = stream.readline() | |
461 | + if line: break | |
462 | + else: | |
463 | + return # EOF, EOF | |
464 | + if stream is job.stdout: | |
465 | + if line.startswith('*** '): | |
466 | + # get data, save to local files | |
467 | + for out, size in zip(files, line[4:].split()): | |
468 | + out.write(stream.read(int(size))) | |
469 | + return | |
470 | self.callback.log('Worker %s: %s' % (num, line.rstrip())) | |
471 | - line = job.stderr.readline() | |
472 | - if line: | |
473 | + else: | |
474 | self.callback.errorlog('Worker %s: %s' % (num, line.rstrip())) | |
475 | + | |
476 | + for i, pkg in enumerate(pkgfiles): | |
477 | + # insert cached packages | |
478 | + save_keptpkgs(pkg) | |
479 | + | |
480 | + # save output to local files | |
481 | + log_messages(i % self.conf.workers) | |
482 | + | |
483 | + for (num, job) in worker_jobs.items(): | |
484 | + # process remaining messages on stderr | |
485 | + log_messages(num) | |
486 | + | |
487 | + if job.wait() != 0: | |
488 | + msg = "Worker exited with non-zero value: %s. Fatal." % job.returncode | |
489 | + self.callback.errorlog(msg) | |
490 | + raise MDError, msg | |
491 | ||
492 | - | |
493 | if not self.conf.quiet: | |
494 | self.callback.log("Workers Finished") | |
495 | - # finished with workers | |
496 | - # go to their dirs and add the contents | |
497 | - if not self.conf.quiet: | |
498 | - self.callback.log("Gathering worker results") | |
499 | - for num in range(self.conf.workers): | |
500 | - for (fn, fo) in (('primary.xml', self.primaryfile), | |
501 | - ('filelists.xml', self.flfile), | |
502 | - ('other.xml', self.otherfile)): | |
503 | - fnpath = worker_tmp_path + '/' + str(num) + '/' + fn | |
504 | - if os.path.exists(fnpath): | |
505 | - fo.write(open(fnpath, 'r').read()) | |
506 | - | |
507 | ||
508 | for pkgfile in pkgfiles: | |
509 | if self.conf.deltas: | |
510 | - po = self.read_in_package(pkgfile, pkgpath=pkgpath, reldir=reldir) | |
511 | - self._do_delta_rpm_package(po) | |
512 | + try: | |
513 | + po = self.read_in_package(pkgfile, pkgpath=pkgpath, reldir=reldir) | |
514 | + self._do_delta_rpm_package(po) | |
515 | + except MDError, e: | |
516 | + errorprint(e) | |
517 | + continue | |
518 | self.read_pkgs.append(pkgfile) | |
519 | ||
520 | + save_keptpkgs(None) # append anything left | |
521 | return self.current_pkg | |
522 | ||
523 | ||
524 | def closeMetadataDocs(self): | |
525 | - if not self.conf.quiet: | |
526 | - self.callback.log('') | |
527 | - | |
528 | - | |
529 | # save them up to the tmp locations: | |
530 | if not self.conf.quiet: | |
531 | self.callback.log(_('Saving Primary metadata')) | |
532 | @@ -784,7 +806,6 @@ class MetaDataGenerator: | |
533 | return self._old_package_dict | |
534 | ||
535 | self._old_package_dict = {} | |
536 | - opl = [] | |
537 | for d in self.conf.oldpackage_paths: | |
538 | for f in self.getFileList(d, '.rpm'): | |
539 | fp = d + '/' + f | |
540 | @@ -833,7 +854,7 @@ class MetaDataGenerator: | |
541 | return ' '.join(results) | |
542 | ||
543 | def _createRepoDataObject(self, mdfile, mdtype, compress=True, | |
544 | - compress_type='gzip', attribs={}): | |
545 | + compress_type=None, attribs={}): | |
546 | """return random metadata as RepoData object to be added to RepoMD | |
547 | mdfile = complete path to file | |
548 | mdtype = the metadata type to use | |
549 | @@ -843,15 +864,13 @@ class MetaDataGenerator: | |
550 | sfile = os.path.basename(mdfile) | |
551 | fo = open(mdfile, 'r') | |
552 | outdir = os.path.join(self.conf.outputdir, self.conf.tempdir) | |
553 | + if not compress_type: | |
554 | + compress_type = self.conf.compress_type | |
555 | if compress: | |
556 | - if compress_type == 'gzip': | |
557 | - sfile = '%s.gz' % sfile | |
558 | - outfn = os.path.join(outdir, sfile) | |
559 | - output = GzipFile(filename = outfn, mode='wb') | |
560 | - elif compress_type == 'bzip2': | |
561 | - sfile = '%s.bz2' % sfile | |
562 | - outfn = os.path.join(outdir, sfile) | |
563 | - output = BZ2File(filename = outfn, mode='wb') | |
564 | + sfile = '%s.%s' % (sfile, compress_type) | |
565 | + outfn = os.path.join(outdir, sfile) | |
566 | + output = compressOpen(outfn, mode='wb', compress_type=compress_type) | |
567 | + | |
568 | else: | |
569 | outfn = os.path.join(outdir, sfile) | |
570 | output = open(outfn, 'w') | |
571 | @@ -874,14 +893,13 @@ class MetaDataGenerator: | |
572 | ||
573 | thisdata = RepoData() | |
574 | thisdata.type = mdtype | |
575 | - baseloc = None | |
576 | thisdata.location = (self.conf.baseurl, os.path.join(self.conf.finaldir, sfile)) | |
577 | thisdata.checksum = (self.conf.sumtype, csum) | |
578 | if compress: | |
579 | thisdata.openchecksum = (self.conf.sumtype, open_csum) | |
580 | ||
581 | thisdata.size = str(os.stat(outfn).st_size) | |
582 | - thisdata.timestamp = str(os.stat(outfn).st_mtime) | |
583 | + thisdata.timestamp = str(int(os.stat(outfn).st_mtime)) | |
584 | for (k, v) in attribs.items(): | |
585 | setattr(thisdata, k, str(v)) | |
586 | ||
587 | @@ -925,9 +943,14 @@ class MetaDataGenerator: | |
588 | rp = sqlitecachec.RepodataParserSqlite(repopath, repomd.repoid, None) | |
589 | ||
590 | for (rpm_file, ftype) in workfiles: | |
591 | + # when we fix y-m-p and non-gzipped xml files - then we can make this just add | |
592 | + # self.conf.compress_type | |
593 | + if ftype in ('other', 'filelists', 'primary'): | |
594 | + rpm_file = rpm_file + '.' + 'gz' | |
595 | + elif rpm_file.find('.') != -1 and rpm_file.split('.')[-1] not in _available_compression: | |
596 | + rpm_file = rpm_file + '.' + self.conf.compress_type | |
597 | complete_path = os.path.join(repopath, rpm_file) | |
598 | - | |
599 | - zfo = _gzipOpen(complete_path) | |
600 | + zfo = compressOpen(complete_path) | |
601 | # This is misc.checksum() done locally so we can get the size too. | |
602 | data = misc.Checksums([sumtype]) | |
603 | while data.read(zfo, 2**16): | |
604 | @@ -966,14 +989,20 @@ class MetaDataGenerator: | |
605 | good_name = '%s.sqlite' % ftype | |
606 | resultpath = os.path.join(repopath, good_name) | |
607 | ||
608 | + # compat compression for rhel5 compatibility from fedora :( | |
609 | + compress_type = self.conf.compress_type | |
610 | + if self.compat_compress: | |
611 | + compress_type = 'bz2' | |
612 | + | |
613 | # rename from silly name to not silly name | |
614 | os.rename(tmp_result_path, resultpath) | |
615 | - compressed_name = '%s.bz2' % good_name | |
616 | + compressed_name = '%s.%s' % (good_name, compress_type) | |
617 | result_compressed = os.path.join(repopath, compressed_name) | |
618 | db_csums[ftype] = misc.checksum(sumtype, resultpath) | |
619 | ||
620 | # compress the files | |
621 | - bzipFile(resultpath, result_compressed) | |
622 | + | |
623 | + compressFile(resultpath, result_compressed, compress_type) | |
624 | # csum the compressed file | |
625 | db_compressed_sums[ftype] = misc.checksum(sumtype, | |
626 | result_compressed) | |
627 | @@ -983,8 +1012,8 @@ class MetaDataGenerator: | |
628 | os.unlink(resultpath) | |
629 | ||
630 | if self.conf.unique_md_filenames: | |
631 | - csum_compressed_name = '%s-%s.bz2' % ( | |
632 | - db_compressed_sums[ftype], good_name) | |
633 | + csum_compressed_name = '%s-%s.%s' % ( | |
634 | + db_compressed_sums[ftype], good_name, compress_type) | |
635 | csum_result_compressed = os.path.join(repopath, | |
636 | csum_compressed_name) | |
637 | os.rename(result_compressed, csum_result_compressed) | |
638 | @@ -1001,7 +1030,7 @@ class MetaDataGenerator: | |
639 | data.location = (self.conf.baseurl, | |
640 | os.path.join(self.conf.finaldir, compressed_name)) | |
641 | data.checksum = (sumtype, db_compressed_sums[ftype]) | |
642 | - data.timestamp = str(db_stat.st_mtime) | |
643 | + data.timestamp = str(int(db_stat.st_mtime)) | |
644 | data.size = str(db_stat.st_size) | |
645 | data.opensize = str(un_stat.st_size) | |
646 | data.openchecksum = (sumtype, db_csums[ftype]) | |
647 | @@ -1020,7 +1049,13 @@ class MetaDataGenerator: | |
648 | data.openchecksum = (sumtype, uncsum) | |
649 | ||
650 | if self.conf.unique_md_filenames: | |
651 | - res_file = '%s-%s.xml.gz' % (csum, ftype) | |
652 | + if ftype in ('primary', 'filelists', 'other'): | |
653 | + compress = 'gz' | |
654 | + else: | |
655 | + compress = self.conf.compress_type | |
656 | + | |
657 | + main_name = '.'.join(rpm_file.split('.')[:-1]) | |
658 | + res_file = '%s-%s.%s' % (csum, main_name, compress) | |
659 | orig_file = os.path.join(repopath, rpm_file) | |
660 | dest_file = os.path.join(repopath, res_file) | |
661 | os.rename(orig_file, dest_file) | |
662 | @@ -1046,7 +1081,7 @@ class MetaDataGenerator: | |
663 | ||
664 | ||
665 | if self.conf.additional_metadata: | |
666 | - for md_type, mdfile in self.conf.additional_metadata.items(): | |
667 | + for md_type, md_file in self.conf.additional_metadata.items(): | |
668 | mdcontent = self._createRepoDataObject(md_file, md_type) | |
669 | repomd.repoData[mdcontent.type] = mdcontent | |
670 | ||
671 | @@ -1110,23 +1145,43 @@ class MetaDataGenerator: | |
672 | raise MDError, _( | |
673 | 'Could not remove old metadata file: %s: %s') % (oldfile, e) | |
674 | ||
675 | - # Move everything else back from olddir (eg. repoview files) | |
676 | - try: | |
677 | - old_contents = os.listdir(output_old_dir) | |
678 | - except (OSError, IOError), e: | |
679 | - old_contents = [] | |
680 | - | |
681 | + old_to_remove = [] | |
682 | + old_pr = [] | |
683 | + old_fl = [] | |
684 | + old_ot = [] | |
685 | + old_pr_db = [] | |
686 | + old_fl_db = [] | |
687 | + old_ot_db = [] | |
688 | for f in os.listdir(output_old_dir): | |
689 | oldfile = os.path.join(output_old_dir, f) | |
690 | finalfile = os.path.join(output_final_dir, f) | |
691 | - if f.find('-') != -1 and f.split('-')[1] in ('primary.sqlite.bz2', | |
692 | - 'filelists.sqlite.bz2', 'primary.xml.gz','other.sqlite.bz2', | |
693 | - 'other.xml.gz','filelists.xml.gz'): | |
694 | - os.remove(oldfile) # kill off the old ones | |
695 | - continue | |
696 | - if f in ('filelists.sqlite.bz2', 'other.sqlite.bz2', | |
697 | - 'primary.sqlite.bz2'): | |
698 | - os.remove(oldfile) | |
699 | + | |
700 | + for (end,lst) in (('-primary.sqlite', old_pr_db), ('-primary.xml', old_pr), | |
701 | + ('-filelists.sqlite', old_fl_db), ('-filelists.xml', old_fl), | |
702 | + ('-other.sqlite', old_ot_db), ('-other.xml', old_ot)): | |
703 | + fn = '.'.join(f.split('.')[:-1]) | |
704 | + if fn.endswith(end): | |
705 | + lst.append(oldfile) | |
706 | + break | |
707 | + | |
708 | + # make a list of the old metadata files we don't want to remove. | |
709 | + for lst in (old_pr, old_fl, old_ot, old_pr_db, old_fl_db, old_ot_db): | |
710 | + sortlst = sorted(lst, key=lambda x: os.path.getmtime(x), | |
711 | + reverse=True) | |
712 | + for thisf in sortlst[self.conf.retain_old_md:]: | |
713 | + old_to_remove.append(thisf) | |
714 | + | |
715 | + for f in os.listdir(output_old_dir): | |
716 | + oldfile = os.path.join(output_old_dir, f) | |
717 | + finalfile = os.path.join(output_final_dir, f) | |
718 | + fn = '.'.join(f.split('.')[:-1]) | |
719 | + if fn in ('filelists.sqlite', 'other.sqlite', | |
720 | + 'primary.sqlite') or oldfile in old_to_remove: | |
721 | + try: | |
722 | + os.remove(oldfile) | |
723 | + except (OSError, IOError), e: | |
724 | + raise MDError, _( | |
725 | + 'Could not remove old metadata file: %s: %s') % (oldfile, e) | |
726 | continue | |
727 | ||
728 | if os.path.exists(finalfile): | |
729 | @@ -1147,14 +1202,19 @@ class MetaDataGenerator: | |
730 | msg += _('Error was %s') % e | |
731 | raise MDError, msg | |
732 | ||
733 | - try: | |
734 | - os.rmdir(output_old_dir) | |
735 | - except OSError, e: | |
736 | - self.errorlog(_('Could not remove old metadata dir: %s') | |
737 | - % self.conf.olddir) | |
738 | - self.errorlog(_('Error was %s') % e) | |
739 | - self.errorlog(_('Please clean up this directory manually.')) | |
740 | + self._cleanup_tmp_repodata_dir() | |
741 | + self._cleanup_update_tmp_dir() | |
742 | + self._write_out_read_pkgs_list() | |
743 | + | |
744 | ||
745 | + def _cleanup_update_tmp_dir(self): | |
746 | + if not self.conf.update: | |
747 | + return | |
748 | + | |
749 | + shutil.rmtree(self.oldData._repo.basecachedir, ignore_errors=True) | |
750 | + shutil.rmtree(self.oldData._repo.base_persistdir, ignore_errors=True) | |
751 | + | |
752 | + def _write_out_read_pkgs_list(self): | |
753 | # write out the read_pkgs_list file with self.read_pkgs | |
754 | if self.conf.read_pkgs_list: | |
755 | try: | |
756 | @@ -1167,6 +1227,23 @@ class MetaDataGenerator: | |
757 | % self.conf.read_pkgs_list) | |
758 | self.errorlog(_('Error was %s') % e) | |
759 | ||
760 | + def _cleanup_tmp_repodata_dir(self): | |
761 | + output_old_dir = os.path.join(self.conf.outputdir, self.conf.olddir) | |
762 | + output_temp_dir = os.path.join(self.conf.outputdir, self.conf.tempdir) | |
763 | + for dirbase in (self.conf.olddir, self.conf.tempdir): | |
764 | + dirpath = os.path.join(self.conf.outputdir, dirbase) | |
765 | + if os.path.exists(dirpath): | |
766 | + try: | |
767 | + os.rmdir(dirpath) | |
768 | + except OSError, e: | |
769 | + self.errorlog(_('Could not remove temp metadata dir: %s') | |
770 | + % dirbase) | |
771 | + self.errorlog(_('Error was %s') % e) | |
772 | + self.errorlog(_('Please clean up this directory manually.')) | |
773 | + # our worker tmp path | |
774 | + if hasattr(self, '_worker_tmp_path') and os.path.exists(self._worker_tmp_path): | |
775 | + shutil.rmtree(self._worker_tmp_path, ignore_errors=True) | |
776 | + | |
777 | def setup_sqlite_dbs(self, initdb=True): | |
778 | """sets up the sqlite dbs w/table schemas and db_infos""" | |
779 | destdir = os.path.join(self.conf.outputdir, self.conf.tempdir) | |
780 | @@ -1194,24 +1271,6 @@ class SplitMetaDataGenerator(MetaDataGenerator): | |
781 | (scheme, netloc, path, query, fragid) = urlparse.urlsplit(url) | |
782 | return urlparse.urlunsplit((scheme, netloc, path, query, str(fragment))) | |
783 | ||
784 | - def getFileList(self, directory, ext): | |
785 | - | |
786 | - extlen = len(ext) | |
787 | - | |
788 | - def extension_visitor(arg, dirname, names): | |
789 | - for fn in names: | |
790 | - if os.path.isdir(fn): | |
791 | - continue | |
792 | - elif fn[-extlen:].lower() == '%s' % (ext): | |
793 | - reldir = os.path.basename(dirname) | |
794 | - if reldir == os.path.basename(directory): | |
795 | - reldir = "" | |
796 | - arg.append(os.path.join(reldir, fn)) | |
797 | - | |
798 | - rpmlist = [] | |
799 | - os.path.walk(directory, extension_visitor, rpmlist) | |
800 | - return rpmlist | |
801 | - | |
802 | def doPkgMetadata(self): | |
803 | """all the heavy lifting for the package metadata""" | |
804 | if len(self.conf.directories) == 1: | |
805 | @@ -1232,6 +1291,19 @@ class SplitMetaDataGenerator(MetaDataGenerator): | |
806 | thisdir = os.path.join(self.conf.basedir, mydir) | |
807 | ||
808 | filematrix[mydir] = self.getFileList(thisdir, '.rpm') | |
809 | + | |
810 | + # pkglist is a bit different for split media, as we have to know | |
811 | + # which dir. it belongs to. So we walk the dir. and then filter. | |
812 | + # We could be faster by not walking the dir. ... but meh. | |
813 | + if self.conf.pkglist: | |
814 | + pkglist = set(self.conf.pkglist) | |
815 | + pkgs = [] | |
816 | + for fname in filematrix[mydir]: | |
817 | + if fname not in pkglist: | |
818 | + continue | |
819 | + pkgs.append(fname) | |
820 | + filematrix[mydir] = pkgs | |
821 | + | |
822 | self.trimRpms(filematrix[mydir]) | |
823 | self.pkgcount += len(filematrix[mydir]) | |
824 | ||
825 | @@ -1240,7 +1312,6 @@ class SplitMetaDataGenerator(MetaDataGenerator): | |
826 | self.conf.baseurl = self._getFragmentUrl(self.conf.baseurl, mediano) | |
827 | try: | |
828 | self.openMetadataDocs() | |
829 | - original_basedir = self.conf.basedir | |
830 | for mydir in self.conf.directories: | |
831 | self.conf.baseurl = self._getFragmentUrl(self.conf.baseurl, mediano) | |
832 | self.writeMetadataDocs(filematrix[mydir], mydir) | |
833 | diff --git a/createrepo/merge.py b/createrepo/merge.py | |
834 | index b3b2ea1..1ac43bb 100644 | |
835 | --- a/createrepo/merge.py | |
836 | +++ b/createrepo/merge.py | |
837 | @@ -24,6 +24,7 @@ from yum.misc import unique, getCacheDir | |
838 | import yum.update_md | |
839 | import rpmUtils.arch | |
840 | import operator | |
841 | +from utils import MDError | |
842 | import createrepo | |
843 | import tempfile | |
844 | ||
845 | @@ -84,6 +85,8 @@ class RepoMergeBase: | |
846 | # in the repolist | |
847 | count = 0 | |
848 | for r in self.repolist: | |
849 | + if r[0] == '/': | |
850 | + r = 'file://' + r # just fix the file repos, this is silly. | |
851 | count +=1 | |
852 | rid = 'repo%s' % count | |
853 | n = self.yumbase.add_enable_repo(rid, baseurls=[r], | |
854 | @@ -92,7 +95,10 @@ class RepoMergeBase: | |
855 | n._merge_rank = count | |
856 | ||
857 | #setup our sacks | |
858 | - self.yumbase._getSacks(archlist=self.archlist) | |
859 | + try: | |
860 | + self.yumbase._getSacks(archlist=self.archlist) | |
861 | + except yum.Errors.RepoError, e: | |
862 | + raise MDError, "Could not setup merge repo pkgsack: %s" % e | |
863 | ||
864 | myrepos = self.yumbase.repos.listEnabled() | |
865 | ||
866 | @@ -102,11 +108,16 @@ class RepoMergeBase: | |
867 | def write_metadata(self, outputdir=None): | |
868 | mytempdir = tempfile.mkdtemp() | |
869 | if self.groups: | |
870 | - comps_fn = mytempdir + '/groups.xml' | |
871 | - compsfile = open(comps_fn, 'w') | |
872 | - compsfile.write(self.yumbase.comps.xml()) | |
873 | - compsfile.close() | |
874 | - self.mdconf.groupfile=comps_fn | |
875 | + try: | |
876 | + comps_fn = mytempdir + '/groups.xml' | |
877 | + compsfile = open(comps_fn, 'w') | |
878 | + compsfile.write(self.yumbase.comps.xml()) | |
879 | + compsfile.close() | |
880 | + except yum.Errors.GroupsError, e: | |
881 | + # groups not being available shouldn't be a fatal error | |
882 | + pass | |
883 | + else: | |
884 | + self.mdconf.groupfile=comps_fn | |
885 | ||
886 | if self.updateinfo: | |
887 | ui_fn = mytempdir + '/updateinfo.xml' | |
888 | diff --git a/createrepo/readMetadata.py b/createrepo/readMetadata.py | |
889 | index 27d3690..54863cb 100644 | |
890 | --- a/createrepo/readMetadata.py | |
891 | +++ b/createrepo/readMetadata.py | |
892 | @@ -16,11 +16,25 @@ | |
893 | # Copyright 2006 Red Hat | |
894 | ||
895 | import os | |
896 | -import libxml2 | |
897 | import stat | |
898 | from utils import errorprint, _ | |
899 | ||
900 | -from yum import repoMDObject | |
901 | +import yum | |
902 | +from yum import misc | |
903 | +from yum.Errors import YumBaseError | |
904 | +import tempfile | |
905 | +class CreaterepoPkgOld(yum.sqlitesack.YumAvailablePackageSqlite): | |
906 | + # special for special people like us. | |
907 | + def _return_remote_location(self): | |
908 | + | |
909 | + if self.basepath: | |
910 | + msg = """<location xml:base="%s" href="%s"/>\n""" % ( | |
911 | + misc.to_xml(self.basepath, attrib=True), | |
912 | + misc.to_xml(self.relativepath, attrib=True)) | |
913 | + else: | |
914 | + msg = """<location href="%s"/>\n""" % misc.to_xml(self.relativepath, attrib=True) | |
915 | + | |
916 | + return msg | |
917 | ||
918 | ||
919 | class MetadataIndex(object): | |
920 | @@ -30,178 +44,72 @@ class MetadataIndex(object): | |
921 | opts = {} | |
922 | self.opts = opts | |
923 | self.outputdir = outputdir | |
924 | + realpath = os.path.realpath(outputdir) | |
925 | repodatadir = self.outputdir + '/repodata' | |
926 | - myrepomdxml = repodatadir + '/repomd.xml' | |
927 | - if os.path.exists(myrepomdxml): | |
928 | - repomd = repoMDObject.RepoMD('garbageid', myrepomdxml) | |
929 | - b = repomd.getData('primary').location[1] | |
930 | - f = repomd.getData('filelists').location[1] | |
931 | - o = repomd.getData('other').location[1] | |
932 | - basefile = os.path.join(self.outputdir, b) | |
933 | - filelistfile = os.path.join(self.outputdir, f) | |
934 | - otherfile = os.path.join(self.outputdir, o) | |
935 | - else: | |
936 | - basefile = filelistfile = otherfile = "" | |
937 | - | |
938 | - self.files = {'base' : basefile, | |
939 | - 'filelist' : filelistfile, | |
940 | - 'other' : otherfile} | |
941 | - self.scan() | |
942 | + self._repo = yum.yumRepo.YumRepository('garbageid') | |
943 | + self._repo.baseurl = 'file://' + realpath | |
944 | + self._repo.basecachedir = tempfile.mkdtemp(dir='/var/tmp', prefix="createrepo") | |
945 | + self._repo.base_persistdir = tempfile.mkdtemp(dir='/var/tmp', prefix="createrepo-p") | |
946 | + self._repo.metadata_expire = 1 | |
947 | + self._repo.gpgcheck = 0 | |
948 | + self._repo.repo_gpgcheck = 0 | |
949 | + self._repo._sack = yum.sqlitesack.YumSqlitePackageSack(CreaterepoPkgOld) | |
950 | + self.pkg_tups_by_path = {} | |
951 | + try: | |
952 | + self.scan() | |
953 | + except YumBaseError, e: | |
954 | + print "Could not find valid repo at: %s" % self.outputdir | |
955 | + | |
956 | ||
957 | def scan(self): | |
958 | - """Read in and index old repo data""" | |
959 | - self.basenodes = {} | |
960 | - self.filesnodes = {} | |
961 | - self.othernodes = {} | |
962 | - self.pkg_ids = {} | |
963 | + """Read in old repodata""" | |
964 | if self.opts.get('verbose'): | |
965 | print _("Scanning old repo data") | |
966 | - for fn in self.files.values(): | |
967 | - if not os.path.exists(fn): | |
968 | - #cannot scan | |
969 | - errorprint(_("Warning: Old repodata file missing: %s") % fn) | |
970 | - return | |
971 | - root = libxml2.parseFile(self.files['base']).getRootElement() | |
972 | - self._scanPackageNodes(root, self._handleBase) | |
973 | - if self.opts.get('verbose'): | |
974 | - print _("Indexed %i base nodes" % len(self.basenodes)) | |
975 | - root = libxml2.parseFile(self.files['filelist']).getRootElement() | |
976 | - self._scanPackageNodes(root, self._handleFiles) | |
977 | - if self.opts.get('verbose'): | |
978 | - print _("Indexed %i filelist nodes" % len(self.filesnodes)) | |
979 | - root = libxml2.parseFile(self.files['other']).getRootElement() | |
980 | - self._scanPackageNodes(root, self._handleOther) | |
981 | - if self.opts.get('verbose'): | |
982 | - print _("Indexed %i other nodes" % len(self.othernodes)) | |
983 | - #reverse index pkg ids to track references | |
984 | - self.pkgrefs = {} | |
985 | - for relpath, pkgid in self.pkg_ids.iteritems(): | |
986 | - self.pkgrefs.setdefault(pkgid,[]).append(relpath) | |
987 | - | |
988 | - def _scanPackageNodes(self, root, handler): | |
989 | - node = root.children | |
990 | - while node is not None: | |
991 | - if node.type != "element": | |
992 | - node = node.next | |
993 | + self._repo.sack.populate(self._repo, 'all', None, False) | |
994 | + for thispo in self._repo.sack: | |
995 | + mtime = thispo.filetime | |
996 | + size = thispo.size | |
997 | + relpath = thispo.relativepath | |
998 | + do_stat = self.opts.get('do_stat', True) | |
999 | + if mtime is None: | |
1000 | + print _("mtime missing for %s") % relpath | |
1001 | continue | |
1002 | - if node.name == "package": | |
1003 | - handler(node) | |
1004 | - node = node.next | |
1005 | - | |
1006 | - def _handleBase(self, node): | |
1007 | - top = node | |
1008 | - node = node.children | |
1009 | - pkgid = None | |
1010 | - mtime = None | |
1011 | - size = None | |
1012 | - relpath = None | |
1013 | - do_stat = self.opts.get('do_stat', True) | |
1014 | - while node is not None: | |
1015 | - if node.type != "element": | |
1016 | - node = node.next | |
1017 | + if size is None: | |
1018 | + print _("size missing for %s") % relpath | |
1019 | continue | |
1020 | - if node.name == "checksum": | |
1021 | - pkgid = node.content | |
1022 | - elif node.name == "time": | |
1023 | - mtime = int(node.prop('file')) | |
1024 | - elif node.name == "size": | |
1025 | - size = int(node.prop('package')) | |
1026 | - elif node.name == "location": | |
1027 | - relpath = node.prop('href') | |
1028 | - node = node.next | |
1029 | - if relpath is None: | |
1030 | - print _("Incomplete data for node") | |
1031 | - return | |
1032 | - if pkgid is None: | |
1033 | - print _("pkgid missing for %s") % relpath | |
1034 | - return | |
1035 | - if mtime is None: | |
1036 | - print _("mtime missing for %s") % relpath | |
1037 | - return | |
1038 | - if size is None: | |
1039 | - print _("size missing for %s") % relpath | |
1040 | - return | |
1041 | - if do_stat: | |
1042 | - filepath = os.path.join(self.opts['pkgdir'], relpath) | |
1043 | - try: | |
1044 | - st = os.stat(filepath) | |
1045 | - except OSError: | |
1046 | - #file missing -- ignore | |
1047 | - return | |
1048 | - if not stat.S_ISREG(st.st_mode): | |
1049 | - #ignore non files | |
1050 | - return | |
1051 | - #check size and mtime | |
1052 | - if st.st_size != size: | |
1053 | - if self.opts.get('verbose'): | |
1054 | - print _("Size (%i -> %i) changed for file %s") % (size,st.st_size,filepath) | |
1055 | - return | |
1056 | - if int(st.st_mtime) != mtime: | |
1057 | - if self.opts.get('verbose'): | |
1058 | - print _("Modification time changed for %s") % filepath | |
1059 | - return | |
1060 | - #otherwise we index | |
1061 | - self.basenodes[relpath] = top | |
1062 | - self.pkg_ids[relpath] = pkgid | |
1063 | - | |
1064 | - def _handleFiles(self, node): | |
1065 | - pkgid = node.prop('pkgid') | |
1066 | - if pkgid: | |
1067 | - self.filesnodes[pkgid] = node | |
1068 | - | |
1069 | - def _handleOther(self, node): | |
1070 | - pkgid = node.prop('pkgid') | |
1071 | - if pkgid: | |
1072 | - self.othernodes[pkgid] = node | |
1073 | + if do_stat: | |
1074 | + filepath = os.path.join(self.opts['pkgdir'], relpath) | |
1075 | + try: | |
1076 | + st = os.stat(filepath) | |
1077 | + except OSError: | |
1078 | + #file missing -- ignore | |
1079 | + continue | |
1080 | + if not stat.S_ISREG(st.st_mode): | |
1081 | + #ignore non files | |
1082 | + continue | |
1083 | + #check size and mtime | |
1084 | + if st.st_size != size: | |
1085 | + if self.opts.get('verbose'): | |
1086 | + print _("Size (%i -> %i) changed for file %s") % (size,st.st_size,filepath) | |
1087 | + continue | |
1088 | + if int(st.st_mtime) != mtime: | |
1089 | + if self.opts.get('verbose'): | |
1090 | + print _("Modification time changed for %s") % filepath | |
1091 | + continue | |
1092 | + | |
1093 | + self.pkg_tups_by_path[relpath] = thispo.pkgtup | |
1094 | + | |
1095 | ||
1096 | - def getNodes(self, relpath): | |
1097 | - """Return base, filelist, and other nodes for file, if they exist | |
1098 | ||
1099 | - Returns a tuple of nodes, or None if not found | |
1100 | + def getNodes(self, relpath): | |
1101 | + """return a package object based on relative path of pkg | |
1102 | """ | |
1103 | - bnode = self.basenodes.get(relpath,None) | |
1104 | - if bnode is None: | |
1105 | - return None | |
1106 | - pkgid = self.pkg_ids.get(relpath,None) | |
1107 | - if pkgid is None: | |
1108 | - print _("No pkgid found for: %s") % relpath | |
1109 | - return None | |
1110 | - fnode = self.filesnodes.get(pkgid,None) | |
1111 | - if fnode is None: | |
1112 | - return None | |
1113 | - onode = self.othernodes.get(pkgid,None) | |
1114 | - if onode is None: | |
1115 | - return None | |
1116 | - return bnode, fnode, onode | |
1117 | - | |
1118 | - def freeNodes(self,relpath): | |
1119 | - #causing problems | |
1120 | - """Free up nodes corresponding to file, if possible""" | |
1121 | - bnode = self.basenodes.get(relpath,None) | |
1122 | - if bnode is None: | |
1123 | - print "Missing node for %s" % relpath | |
1124 | - return | |
1125 | - bnode.unlinkNode() | |
1126 | - bnode.freeNode() | |
1127 | - del self.basenodes[relpath] | |
1128 | - pkgid = self.pkg_ids.get(relpath,None) | |
1129 | - if pkgid is None: | |
1130 | - print _("No pkgid found for: %s") % relpath | |
1131 | - return None | |
1132 | - del self.pkg_ids[relpath] | |
1133 | - dups = self.pkgrefs.get(pkgid) | |
1134 | - dups.remove(relpath) | |
1135 | - if len(dups): | |
1136 | - #still referenced | |
1137 | - return | |
1138 | - del self.pkgrefs[pkgid] | |
1139 | - for nodes in self.filesnodes, self.othernodes: | |
1140 | - node = nodes.get(pkgid) | |
1141 | - if node is not None: | |
1142 | - node.unlinkNode() | |
1143 | - node.freeNode() | |
1144 | - del nodes[pkgid] | |
1145 | + if relpath in self.pkg_tups_by_path: | |
1146 | + pkgtup = self.pkg_tups_by_path[relpath] | |
1147 | + return self._repo.sack.searchPkgTuple(pkgtup)[0] | |
1148 | + return None | |
1149 | ||
1150 | + | |
1151 | ||
1152 | if __name__ == "__main__": | |
1153 | cwd = os.getcwd() | |
1154 | @@ -209,9 +117,9 @@ if __name__ == "__main__": | |
1155 | 'pkgdir': cwd} | |
1156 | ||
1157 | idx = MetadataIndex(cwd, opts) | |
1158 | - for fn in idx.basenodes.keys(): | |
1159 | - a,b,c, = idx.getNodes(fn) | |
1160 | - a.serialize() | |
1161 | - b.serialize() | |
1162 | - c.serialize() | |
1163 | - idx.freeNodes(fn) | |
1164 | + for fn in idx.pkg_tups_by_path: | |
1165 | + po = idx.getNodes(fn) | |
1166 | + print po.xml_dump_primary_metadata() | |
1167 | + print po.xml_dump_filelists_metadata() | |
1168 | + print po.xml_dump_other_metadata() | |
1169 | + | |
1170 | diff --git a/createrepo/utils.py b/createrepo/utils.py | |
1171 | index 995c3b9..b0d92ec 100644 | |
1172 | --- a/createrepo/utils.py | |
1173 | +++ b/createrepo/utils.py | |
1174 | @@ -23,6 +23,12 @@ import bz2 | |
1175 | import gzip | |
1176 | from gzip import write32u, FNAME | |
1177 | from yum import misc | |
1178 | +_available_compression = ['gz', 'bz2'] | |
1179 | +try: | |
1180 | + import lzma | |
1181 | + _available_compression.append('xz') | |
1182 | +except ImportError: | |
1183 | + lzma = None | |
1184 | ||
1185 | def errorprint(stuff): | |
1186 | print >> sys.stderr, stuff | |
1187 | @@ -34,22 +40,14 @@ def _(args): | |
1188 | ||
1189 | class GzipFile(gzip.GzipFile): | |
1190 | def _write_gzip_header(self): | |
1191 | + # Generate a header that is easily reproduced with gzip -9 -n on | |
1192 | + # an unix-like system | |
1193 | self.fileobj.write('\037\213') # magic header | |
1194 | self.fileobj.write('\010') # compression method | |
1195 | - if hasattr(self, 'name'): | |
1196 | - fname = self.name[:-3] | |
1197 | - else: | |
1198 | - fname = self.filename[:-3] | |
1199 | - flags = 0 | |
1200 | - if fname: | |
1201 | - flags = FNAME | |
1202 | - self.fileobj.write(chr(flags)) | |
1203 | - write32u(self.fileobj, long(0)) | |
1204 | - self.fileobj.write('\002') | |
1205 | - self.fileobj.write('\377') | |
1206 | - if fname: | |
1207 | - self.fileobj.write(fname + '\000') | |
1208 | - | |
1209 | + self.fileobj.write('\000') # flags | |
1210 | + write32u(self.fileobj, long(0)) # timestamp | |
1211 | + self.fileobj.write('\002') # max compression | |
1212 | + self.fileobj.write('\003') # UNIX | |
1213 | ||
1214 | def _gzipOpen(filename, mode="rb", compresslevel=9): | |
1215 | return GzipFile(filename, mode, compresslevel) | |
1216 | @@ -69,6 +67,75 @@ def bzipFile(source, dest): | |
1217 | s_fn.close() | |
1218 | ||
1219 | ||
1220 | +def xzFile(source, dest): | |
1221 | + if not 'xz' in _available_compression: | |
1222 | + raise MDError, "Cannot use xz for compression, library/module is not available" | |
1223 | + | |
1224 | + s_fn = open(source, 'rb') | |
1225 | + destination = lzma.LZMAFile(dest, 'w') | |
1226 | + | |
1227 | + while True: | |
1228 | + data = s_fn.read(1024000) | |
1229 | + | |
1230 | + if not data: break | |
1231 | + destination.write(data) | |
1232 | + | |
1233 | + destination.close() | |
1234 | + s_fn.close() | |
1235 | + | |
1236 | +def gzFile(source, dest): | |
1237 | + | |
1238 | + s_fn = open(source, 'rb') | |
1239 | + destination = GzipFile(dest, 'w') | |
1240 | + | |
1241 | + while True: | |
1242 | + data = s_fn.read(1024000) | |
1243 | + | |
1244 | + if not data: break | |
1245 | + destination.write(data) | |
1246 | + | |
1247 | + destination.close() | |
1248 | + s_fn.close() | |
1249 | + | |
1250 | + | |
1251 | +class Duck: | |
1252 | + def __init__(self, **attr): | |
1253 | + self.__dict__ = attr | |
1254 | + | |
1255 | + | |
1256 | +def compressFile(source, dest, compress_type): | |
1257 | + """Compress an existing file using any compression type from source to dest""" | |
1258 | + | |
1259 | + if compress_type == 'xz': | |
1260 | + xzFile(source, dest) | |
1261 | + elif compress_type == 'bz2': | |
1262 | + bzipFile(source, dest) | |
1263 | + elif compress_type == 'gz': | |
1264 | + gzFile(source, dest) | |
1265 | + else: | |
1266 | + raise MDError, "Unknown compression type %s" % compress_type | |
1267 | + | |
1268 | +def compressOpen(fn, mode='rb', compress_type=None): | |
1269 | + | |
1270 | + if not compress_type: | |
1271 | + # we are readonly and we don't give a compress_type - then guess based on the file extension | |
1272 | + compress_type = fn.split('.')[-1] | |
1273 | + if compress_type not in _available_compression: | |
1274 | + compress_type = 'gz' | |
1275 | + | |
1276 | + if compress_type == 'xz': | |
1277 | + fh = lzma.LZMAFile(fn, mode) | |
1278 | + if mode == 'w': | |
1279 | + fh = Duck(write=lambda s, write=fh.write: s != '' and write(s), | |
1280 | + close=fh.close) | |
1281 | + return fh | |
1282 | + elif compress_type == 'bz2': | |
1283 | + return bz2.BZ2File(fn, mode) | |
1284 | + elif compress_type == 'gz': | |
1285 | + return _gzipOpen(fn, mode) | |
1286 | + else: | |
1287 | + raise MDError, "Unknown compression type %s" % compress_type | |
1288 | + | |
1289 | def returnFD(filename): | |
1290 | try: | |
1291 | fdno = os.open(filename, os.O_RDONLY) | |
1292 | @@ -124,15 +191,28 @@ def encodefiletypelist(filetypelist): | |
1293 | return result | |
1294 | ||
1295 | def split_list_into_equal_chunks(seq, num_chunks): | |
1296 | - avg = len(seq) / float(num_chunks) | |
1297 | - out = [] | |
1298 | - last = 0.0 | |
1299 | - while last < len(seq): | |
1300 | - out.append(seq[int(last):int(last + avg)]) | |
1301 | - last += avg | |
1302 | - | |
1303 | + """it's used on sorted input which is then merged in order""" | |
1304 | + out = [[] for i in range(num_chunks)] | |
1305 | + for i, item in enumerate(seq): | |
1306 | + out[i % num_chunks].append(item) | |
1307 | return out | |
1308 | ||
1309 | +def num_cpus_online(unknown=1): | |
1310 | + if not hasattr(os, "sysconf"): | |
1311 | + return unknown | |
1312 | + | |
1313 | + if not os.sysconf_names.has_key("SC_NPROCESSORS_ONLN"): | |
1314 | + return unknown | |
1315 | + | |
1316 | + ncpus = os.sysconf("SC_NPROCESSORS_ONLN") | |
1317 | + try: | |
1318 | + if int(ncpus) > 0: | |
1319 | + return ncpus | |
1320 | + except: | |
1321 | + pass | |
1322 | + | |
1323 | + return unknown | |
1324 | + | |
1325 | ||
1326 | class MDError(Exception): | |
1327 | def __init__(self, value=None): | |
1328 | diff --git a/createrepo/yumbased.py b/createrepo/yumbased.py | |
1329 | index ac06196..f87ac6d 100644 | |
1330 | --- a/createrepo/yumbased.py | |
1331 | +++ b/createrepo/yumbased.py | |
1332 | @@ -16,6 +16,11 @@ | |
1333 | ||
1334 | ||
1335 | import os | |
1336 | +def _get_umask(): | |
1337 | + oumask = os.umask(0) | |
1338 | + os.umask(oumask) | |
1339 | + return oumask | |
1340 | +_b4rpm_oumask = _get_umask() | |
1341 | import rpm | |
1342 | import types | |
1343 | ||
1344 | @@ -86,6 +91,9 @@ class CreateRepoPackage(YumLocalPackage): | |
1345 | csumo = os.fdopen(csumo, 'w', -1) | |
1346 | csumo.write(checksum) | |
1347 | csumo.close() | |
1348 | + # tempfile forces 002 ... we want to undo that, so that users | |
1349 | + # can share the cache. BZ 833350. | |
1350 | + os.chmod(tmpfilename, 0666 ^ _b4rpm_oumask) | |
1351 | os.rename(tmpfilename, csumfile) | |
1352 | except: | |
1353 | pass | |
1354 | diff --git a/docs/createrepo.8 b/docs/createrepo.8 | |
1355 | index e3c4c3b..ff359de 100644 | |
1356 | --- a/docs/createrepo.8 | |
1357 | +++ b/docs/createrepo.8 | |
1358 | @@ -53,7 +53,8 @@ gullible). | |
1359 | Don't generate repo metadata, if their timestamps are newer than its rpms. | |
1360 | This option decreases the processing time drastically again, if you happen | |
1361 | to run it on an unmodified repo, but it is (currently) mutual exclusive | |
1362 | -with the --split option. | |
1363 | +with the --split option. NOTE: This command will not notice when | |
1364 | +packages have been removed from repo. Use --update to handle that. | |
1365 | .br | |
1366 | .IP "\fB\--split\fP" | |
1367 | Run in split media mode. Rather than pass a single directory, take a set of | |
1368 | @@ -104,7 +105,16 @@ Tells createrepo to generate deltarpms and the delta metadata | |
1369 | paths to look for older pkgs to delta against. Can be specified multiple times | |
1370 | .IP "\fB\--num-deltas\fP int" | |
1371 | the number of older versions to make deltas against. Defaults to 1 | |
1372 | - | |
1373 | +.IP "\fB\--read-pkgs-list\fP READ_PKGS_LIST | |
1374 | +output the paths to the pkgs actually read useful with --update | |
1375 | +.IP "\fB\--max-delta-rpm-size\fP MAX_DELTA_RPM_SIZE | |
1376 | +max size of an rpm that to run deltarpm against (in bytes) | |
1377 | +.IP "\fB\--workers\fP WORKERS | |
1378 | +number of workers to spawn to read rpms | |
1379 | +.IP "\fB\--compress-type\fP | |
1380 | +specify which compression method to use: compat (default), | |
1381 | +xz (may not be available), gz, bz2. | |
1382 | +.IP | |
1383 | ||
1384 | .SH "EXAMPLES" | |
1385 | Here is an example of a repository with a groups file. Note that the | |
1386 | diff --git a/genpkgmetadata.py b/genpkgmetadata.py | |
1387 | index 8c98191..c46e441 100755 | |
1388 | --- a/genpkgmetadata.py | |
1389 | +++ b/genpkgmetadata.py | |
1390 | @@ -37,6 +37,12 @@ def parse_args(args, conf): | |
1391 | Sanity check all the things being passed in. | |
1392 | """ | |
1393 | ||
1394 | + def_workers = os.nice(0) | |
1395 | + if def_workers > 0: | |
1396 | + def_workers = 1 # We are niced, so just use a single worker. | |
1397 | + else: | |
1398 | + def_workers = 0 # zoooom.... | |
1399 | + | |
1400 | _def = yum.misc._default_checksums[0] | |
1401 | _avail = yum.misc._available_checksums | |
1402 | parser = OptionParser(version = "createrepo %s" % createrepo.__version__) | |
1403 | @@ -100,6 +106,8 @@ def parse_args(args, conf): | |
1404 | parser.add_option("--simple-md-filenames", dest="simple_md_filenames", | |
1405 | help="do not include the file's checksum in the filename, helps with proxies", | |
1406 | default=False, action="store_true") | |
1407 | + parser.add_option("--retain-old-md", default=0, type='int', dest='retain_old_md', | |
1408 | + help="keep around the latest (by timestamp) N copies of the old repodata") | |
1409 | parser.add_option("--distro", default=[], action="append", | |
1410 | help="distro tag and optional cpeid: --distro" "'cpeid,textname'") | |
1411 | parser.add_option("--content", default=[], dest='content_tags', | |
1412 | @@ -119,10 +127,15 @@ def parse_args(args, conf): | |
1413 | parser.add_option("--max-delta-rpm-size", default=100000000, | |
1414 | dest='max_delta_rpm_size', type='int', | |
1415 | help="max size of an rpm that to run deltarpm against (in bytes)") | |
1416 | - | |
1417 | - parser.add_option("--workers", default=1, | |
1418 | + parser.add_option("--workers", default=def_workers, | |
1419 | dest='workers', type='int', | |
1420 | help="number of workers to spawn to read rpms") | |
1421 | + parser.add_option("--xz", default=False, | |
1422 | + action="store_true", | |
1423 | + help="use xz for repodata compression") | |
1424 | + parser.add_option("--compress-type", default='compat', dest="compress_type", | |
1425 | + help="which compression type to use") | |
1426 | + | |
1427 | ||
1428 | (opts, argsleft) = parser.parse_args(args) | |
1429 | if len(argsleft) > 1 and not opts.split: | |
1430 | @@ -138,6 +151,9 @@ def parse_args(args, conf): | |
1431 | else: | |
1432 | directories = argsleft | |
1433 | ||
1434 | + if opts.workers >= 128: | |
1435 | + errorprint(_('Warning: More than 128 workers is a lot. Limiting.')) | |
1436 | + opts.workers = 128 | |
1437 | if opts.sumtype == 'sha1': | |
1438 | errorprint(_('Warning: It is more compatible to use sha instead of sha1')) | |
1439 | ||
1440 | @@ -155,6 +171,11 @@ def parse_args(args, conf): | |
1441 | ||
1442 | if opts.nodatabase: | |
1443 | opts.database = False | |
1444 | + | |
1445 | + # xz is just a shorthand for compress_type | |
1446 | + if opts.xz and opts.compress_type == 'compat': | |
1447 | + opts.compress_type='xz' | |
1448 | + | |
1449 | ||
1450 | # let's switch over to using the conf object - put all the opts into it | |
1451 | for opt in parser.option_list: | |
1452 | @@ -240,6 +261,7 @@ def main(args): | |
1453 | if mdgen.checkTimeStamps(): | |
1454 | if mdgen.conf.verbose: | |
1455 | print _('repo is up to date') | |
1456 | + mdgen._cleanup_tmp_repodata_dir() | |
1457 | sys.exit(0) | |
1458 | ||
1459 | if conf.profile: | |
1460 | diff --git a/mergerepo.py b/mergerepo.py | |
1461 | index 05e5f5e..80cb1a8 100755 | |
1462 | --- a/mergerepo.py | |
1463 | +++ b/mergerepo.py | |
1464 | @@ -18,6 +18,7 @@ | |
1465 | ||
1466 | import sys | |
1467 | import createrepo.merge | |
1468 | +from createrepo.utils import MDError | |
1469 | from optparse import OptionParser | |
1470 | ||
1471 | #TODO: | |
1472 | @@ -47,6 +48,9 @@ def parse_args(args): | |
1473 | help="Do not merge group(comps) metadata") | |
1474 | parser.add_option("", "--noupdateinfo", default=False, action="store_true", | |
1475 | help="Do not merge updateinfo metadata") | |
1476 | + parser.add_option("--compress-type", default=None, dest="compress_type", | |
1477 | + help="which compression type to use") | |
1478 | + | |
1479 | (opts, argsleft) = parser.parse_args(args) | |
1480 | ||
1481 | if len(opts.repos) < 2: | |
1482 | @@ -77,9 +81,14 @@ def main(args): | |
1483 | rmbase.groups = False | |
1484 | if opts.noupdateinfo: | |
1485 | rmbase.updateinfo = False | |
1486 | - | |
1487 | - rmbase.merge_repos() | |
1488 | - rmbase.write_metadata() | |
1489 | - | |
1490 | + if opts.compress_type: | |
1491 | + rmbase.mdconf.compress_type = opts.compress_type | |
1492 | + try: | |
1493 | + rmbase.merge_repos() | |
1494 | + rmbase.write_metadata() | |
1495 | + except MDError, e: | |
1496 | + print >> sys.stderr, "Could not merge repos: %s" % e | |
1497 | + sys.exit(1) | |
1498 | + | |
1499 | if __name__ == "__main__": | |
1500 | main(sys.argv[1:]) | |
1501 | diff --git a/modifyrepo.py b/modifyrepo.py | |
1502 | index 17094a4..bf1eec0 100755 | |
1503 | --- a/modifyrepo.py | |
1504 | +++ b/modifyrepo.py | |
1505 | @@ -1,11 +1,15 @@ | |
1506 | #!/usr/bin/python | |
1507 | -# This tools is used to insert arbitrary metadata into an RPM repository. | |
1508 | +# This tool is used to manipulate arbitrary metadata in a RPM repository. | |
1509 | # Example: | |
1510 | # ./modifyrepo.py updateinfo.xml myrepo/repodata | |
1511 | +# or | |
1512 | +# ./modifyrepo.py --remove updateinfo.xml myrepo/repodata | |
1513 | # or in Python: | |
1514 | # >>> from modifyrepo import RepoMetadata | |
1515 | # >>> repomd = RepoMetadata('myrepo/repodata') | |
1516 | # >>> repomd.add('updateinfo.xml') | |
1517 | +# or | |
1518 | +# >>> repomd.remove('updateinfo.xml') | |
1519 | # | |
1520 | # This program is free software; you can redistribute it and/or modify | |
1521 | # it under the terms of the GNU General Public License as published by | |
1522 | @@ -20,11 +24,13 @@ | |
1523 | # (C) Copyright 2006 Red Hat, Inc. | |
1524 | # Luke Macken <lmacken@redhat.com> | |
1525 | # modified by Seth Vidal 2008 | |
1526 | +# modified by Daniel Mach 2011 | |
1527 | ||
1528 | import os | |
1529 | import sys | |
1530 | from createrepo import __version__ | |
1531 | -from createrepo.utils import checksum_and_rename, GzipFile, MDError | |
1532 | +from createrepo.utils import checksum_and_rename, compressOpen, MDError | |
1533 | +from createrepo.utils import _available_compression | |
1534 | from yum.misc import checksum | |
1535 | ||
1536 | from yum.repoMDObject import RepoMD, RepoMDError, RepoData | |
1537 | @@ -39,6 +45,8 @@ class RepoMetadata: | |
1538 | self.repodir = os.path.abspath(repo) | |
1539 | self.repomdxml = os.path.join(self.repodir, 'repomd.xml') | |
1540 | self.checksum_type = 'sha256' | |
1541 | + self.compress = False | |
1542 | + self.compress_type = _available_compression[-1] # best available | |
1543 | ||
1544 | if not os.path.exists(self.repomdxml): | |
1545 | raise MDError, '%s not found' % self.repomdxml | |
1546 | @@ -49,6 +57,35 @@ class RepoMetadata: | |
1547 | except RepoMDError, e: | |
1548 | raise MDError, 'Could not parse %s' % self.repomdxml | |
1549 | ||
1550 | + def _get_mdtype(self, mdname, mdtype=None): | |
1551 | + """ Get mdtype from existing mdtype or from a mdname. """ | |
1552 | + if mdtype: | |
1553 | + return mdtype | |
1554 | + return mdname.split('.')[0] | |
1555 | + | |
1556 | + def _print_repodata(self, repodata): | |
1557 | + """ Print repodata details. """ | |
1558 | + print " type =", repodata.type | |
1559 | + print " location =", repodata.location[1] | |
1560 | + print " checksum =", repodata.checksum[1] | |
1561 | + print " timestamp =", repodata.timestamp | |
1562 | + print " open-checksum =", repodata.openchecksum[1] | |
1563 | + | |
1564 | + def _write_repomd(self): | |
1565 | + """ Write the updated repomd.xml. """ | |
1566 | + outmd = file(self.repomdxml, 'w') | |
1567 | + outmd.write(self.repoobj.dump_xml()) | |
1568 | + outmd.close() | |
1569 | + print "Wrote:", self.repomdxml | |
1570 | + | |
1571 | + def _remove_repodata_file(self, repodata): | |
1572 | + """ Remove a file specified in repodata location """ | |
1573 | + try: | |
1574 | + os.remove(repodata.location[1]) | |
1575 | + except OSError, ex: | |
1576 | + if ex.errno != 2: | |
1577 | + # continue on a missing file | |
1578 | + raise MDError("could not remove file %s" % repodata.location[1]) | |
1579 | ||
1580 | def add(self, metadata, mdtype=None): | |
1581 | """ Insert arbitrary metadata into this repository. | |
1582 | @@ -63,8 +100,8 @@ class RepoMetadata: | |
1583 | mdname = 'updateinfo.xml' | |
1584 | elif isinstance(metadata, str): | |
1585 | if os.path.exists(metadata): | |
1586 | - if metadata.endswith('.gz'): | |
1587 | - oldmd = GzipFile(filename=metadata, mode='rb') | |
1588 | + if metadata.split('.')[-1] in ('gz', 'bz2', 'xz'): | |
1589 | + oldmd = compressOpen(metadata, mode='rb') | |
1590 | else: | |
1591 | oldmd = file(metadata, 'r') | |
1592 | md = oldmd.read() | |
1593 | @@ -75,14 +112,19 @@ class RepoMetadata: | |
1594 | else: | |
1595 | raise MDError, 'invalid metadata type' | |
1596 | ||
1597 | + do_compress = False | |
1598 | ## Compress the metadata and move it into the repodata | |
1599 | - if not mdname.endswith('.gz'): | |
1600 | - mdname += '.gz' | |
1601 | - if not mdtype: | |
1602 | - mdtype = mdname.split('.')[0] | |
1603 | - | |
1604 | + if self.compress or not mdname.split('.')[-1] in ('gz', 'bz2', 'xz'): | |
1605 | + do_compress = True | |
1606 | + mdname += '.' + self.compress_type | |
1607 | + mdtype = self._get_mdtype(mdname, mdtype) | |
1608 | + | |
1609 | destmd = os.path.join(self.repodir, mdname) | |
1610 | - newmd = GzipFile(filename=destmd, mode='wb') | |
1611 | + if do_compress: | |
1612 | + newmd = compressOpen(destmd, mode='wb', compress_type=self.compress_type) | |
1613 | + else: | |
1614 | + newmd = open(destmd, 'wb') | |
1615 | + | |
1616 | newmd.write(md) | |
1617 | newmd.close() | |
1618 | print "Wrote:", destmd | |
1619 | @@ -91,11 +133,8 @@ class RepoMetadata: | |
1620 | csum, destmd = checksum_and_rename(destmd, self.checksum_type) | |
1621 | base_destmd = os.path.basename(destmd) | |
1622 | ||
1623 | - | |
1624 | - ## Remove any stale metadata | |
1625 | - if mdtype in self.repoobj.repoData: | |
1626 | - del self.repoobj.repoData[mdtype] | |
1627 | - | |
1628 | + # Remove any stale metadata | |
1629 | + old_rd = self.repoobj.repoData.pop(mdtype, None) | |
1630 | ||
1631 | new_rd = RepoData() | |
1632 | new_rd.type = mdtype | |
1633 | @@ -105,18 +144,28 @@ class RepoMetadata: | |
1634 | new_rd.size = str(os.stat(destmd).st_size) | |
1635 | new_rd.timestamp = str(os.stat(destmd).st_mtime) | |
1636 | self.repoobj.repoData[new_rd.type] = new_rd | |
1637 | - | |
1638 | - print " type =", new_rd.type | |
1639 | - print " location =", new_rd.location[1] | |
1640 | - print " checksum =", new_rd.checksum[1] | |
1641 | - print " timestamp =", new_rd.timestamp | |
1642 | - print " open-checksum =", new_rd.openchecksum[1] | |
1643 | - | |
1644 | - ## Write the updated repomd.xml | |
1645 | - outmd = file(self.repomdxml, 'w') | |
1646 | - outmd.write(self.repoobj.dump_xml()) | |
1647 | - outmd.close() | |
1648 | - print "Wrote:", self.repomdxml | |
1649 | + self._print_repodata(new_rd) | |
1650 | + self._write_repomd() | |
1651 | + | |
1652 | + if old_rd is not None and old_rd.location[1] != new_rd.location[1]: | |
1653 | + # remove the old file when overwriting metadata | |
1654 | + # with the same mdtype but different location | |
1655 | + self._remove_repodata_file(old_rd) | |
1656 | + | |
1657 | + def remove(self, metadata, mdtype=None): | |
1658 | + """ Remove metadata from this repository. """ | |
1659 | + mdname = metadata | |
1660 | + mdtype = self._get_mdtype(mdname, mdtype) | |
1661 | + | |
1662 | + old_rd = self.repoobj.repoData.pop(mdtype, None) | |
1663 | + if old_rd is None: | |
1664 | + print "Metadata not found: %s" % mdtype | |
1665 | + return | |
1666 | + | |
1667 | + self._remove_repodata_file(old_rd) | |
1668 | + print "Removed:" | |
1669 | + self._print_repodata(old_rd) | |
1670 | + self._write_repomd() | |
1671 | ||
1672 | ||
1673 | def main(args): | |
1674 | @@ -124,7 +173,13 @@ def main(args): | |
1675 | # query options | |
1676 | parser.add_option("--mdtype", dest='mdtype', | |
1677 | help="specific datatype of the metadata, will be derived from the filename if not specified") | |
1678 | - parser.usage = "modifyrepo [options] <input_metadata> <output repodata>" | |
1679 | + parser.add_option("--remove", action="store_true", | |
1680 | + help="remove specified file from repodata") | |
1681 | + parser.add_option("--compress", action="store_true", default=False, | |
1682 | + help="compress the new repodata before adding it to the repo") | |
1683 | + parser.add_option("--compress-type", dest='compress_type', default='gz', | |
1684 | + help="compression format to use") | |
1685 | + parser.usage = "modifyrepo [options] [--remove] <input_metadata> <output repodata>" | |
1686 | ||
1687 | (opts, argsleft) = parser.parse_args(args) | |
1688 | if len(argsleft) != 2: | |
1689 | @@ -137,11 +192,28 @@ def main(args): | |
1690 | except MDError, e: | |
1691 | print "Could not access repository: %s" % str(e) | |
1692 | return 1 | |
1693 | + | |
1694 | + | |
1695 | + repomd.compress = opts.compress | |
1696 | + if opts.compress_type in _available_compression: | |
1697 | + repomd.compress_type = opts.compress_type | |
1698 | + | |
1699 | + # remove | |
1700 | + if opts.remove: | |
1701 | + try: | |
1702 | + repomd.remove(metadata) | |
1703 | + except MDError, ex: | |
1704 | + print "Could not remove metadata: %s" % (metadata, str(ex)) | |
1705 | + return 1 | |
1706 | + return | |
1707 | + | |
1708 | + # add | |
1709 | try: | |
1710 | repomd.add(metadata, mdtype=opts.mdtype) | |
1711 | except MDError, e: | |
1712 | print "Could not add metadata from file %s: %s" % (metadata, str(e)) | |
1713 | return 1 | |
1714 | + | |
1715 | ||
1716 | if __name__ == '__main__': | |
1717 | ret = main(sys.argv[1:]) | |
1718 | diff --git a/worker.py b/worker.py | |
1719 | index eb35ef7..fe6758f 100755 | |
1720 | --- a/worker.py | |
1721 | +++ b/worker.py | |
1722 | @@ -5,6 +5,7 @@ import yum | |
1723 | import createrepo | |
1724 | import os | |
1725 | import rpmUtils | |
1726 | +import re | |
1727 | from optparse import OptionParser | |
1728 | ||
1729 | ||
1730 | @@ -23,6 +24,8 @@ def main(args): | |
1731 | parser = OptionParser() | |
1732 | parser.add_option('--tmpmdpath', default=None, | |
1733 | help="path where the outputs should be dumped for this worker") | |
1734 | + parser.add_option('--pkglist', default=None, | |
1735 | + help="file to read the pkglist from in lieu of all of them on the cli") | |
1736 | parser.add_option("--pkgoptions", default=[], action='append', | |
1737 | help="pkgoptions in the format of key=value") | |
1738 | parser.add_option("--quiet", default=False, action='store_true', | |
1739 | @@ -36,10 +39,6 @@ def main(args): | |
1740 | opts, pkgs = parser.parse_args(args) | |
1741 | external_data = {'_packagenumber': 1} | |
1742 | globalopts = {} | |
1743 | - if not opts.tmpmdpath: | |
1744 | - print >> sys.stderr, "tmpmdpath required for destination files" | |
1745 | - sys.exit(1) | |
1746 | - | |
1747 | ||
1748 | for strs in opts.pkgoptions: | |
1749 | k,v = strs.split('=') | |
1750 | @@ -64,15 +63,34 @@ def main(args): | |
1751 | ||
1752 | reldir = external_data['_reldir'] | |
1753 | ts = rpmUtils.transaction.initReadOnlyTransaction() | |
1754 | - pri = open(opts.tmpmdpath + '/primary.xml' , 'w') | |
1755 | - fl = open(opts.tmpmdpath + '/filelists.xml' , 'w') | |
1756 | - other = open(opts.tmpmdpath + '/other.xml' , 'w') | |
1757 | - | |
1758 | - | |
1759 | + if opts.tmpmdpath: | |
1760 | + files = [open(opts.tmpmdpath + '/%s.xml' % i, 'w') | |
1761 | + for i in ('primary', 'filelists', 'other')] | |
1762 | + def output(*xml): | |
1763 | + for fh, buf in zip(files, xml): | |
1764 | + fh.write(buf) | |
1765 | + else: | |
1766 | + def output(*xml): | |
1767 | + buf = ' '.join(str(len(i)) for i in xml) | |
1768 | + sys.stdout.write('*** %s\n' % buf) | |
1769 | + for buf in xml: | |
1770 | + sys.stdout.write(buf) | |
1771 | + | |
1772 | + if opts.pkglist: | |
1773 | + for line in open(opts.pkglist,'r').readlines(): | |
1774 | + line = line.strip() | |
1775 | + if re.match('^\s*\#.*', line) or re.match('^\s*$', line): | |
1776 | + continue | |
1777 | + pkgs.append(line) | |
1778 | + | |
1779 | + clog_limit=globalopts.get('clog_limit', None) | |
1780 | + if clog_limit is not None: | |
1781 | + clog_limit = int(clog_limit) | |
1782 | for pkgfile in pkgs: | |
1783 | pkgpath = reldir + '/' + pkgfile | |
1784 | if not os.path.exists(pkgpath): | |
1785 | print >> sys.stderr, "File not found: %s" % pkgpath | |
1786 | + output() | |
1787 | continue | |
1788 | ||
1789 | try: | |
1790 | @@ -80,20 +98,17 @@ def main(args): | |
1791 | print "reading %s" % (pkgfile) | |
1792 | ||
1793 | pkg = createrepo.yumbased.CreateRepoPackage(ts, package=pkgpath, | |
1794 | - external_data=external_data) | |
1795 | - pri.write(pkg.xml_dump_primary_metadata()) | |
1796 | - fl.write(pkg.xml_dump_filelists_metadata()) | |
1797 | - other.write(pkg.xml_dump_other_metadata(clog_limit= | |
1798 | - globalopts.get('clog_limit', None))) | |
1799 | + sumtype=globalopts.get('sumtype', None), | |
1800 | + external_data=external_data) | |
1801 | + output(pkg.xml_dump_primary_metadata(), | |
1802 | + pkg.xml_dump_filelists_metadata(), | |
1803 | + pkg.xml_dump_other_metadata(clog_limit=clog_limit)) | |
1804 | except yum.Errors.YumBaseError, e: | |
1805 | print >> sys.stderr, "Error: %s" % e | |
1806 | + output() | |
1807 | continue | |
1808 | else: | |
1809 | external_data['_packagenumber']+=1 | |
1810 | ||
1811 | - pri.close() | |
1812 | - fl.close() | |
1813 | - other.close() | |
1814 | - | |
1815 | if __name__ == "__main__": | |
1816 | main(sys.argv[1:]) |