1 diff --git a/createrepo.bash b/createrepo.bash
2 index 54ac8b2..f5a8bb7 100644
6 # bash completion for createrepo and friends
10 + COMPREPLY=( $( compgen -W "$( ${1:-createrepo} --compress-type=FOO / 2>&1 \
11 + | sed -ne 's/,/ /g' -ne 's/.*[Cc]ompression.*://p' )" -- "$2" ) )
19 - --version|-h|--help|-u|--baseurl|--distro|--content|--repo|--workers|\
20 + --version|-h|--help|-u|--baseurl|--distro|--content|--repo|\
21 --revision|-x|--excludes|--changelog-limit|--max-delta-rpm-size)
24 @@ -30,10 +36,24 @@ _cr_createrepo()
25 COMPREPLY=( $( compgen -f -o plusdirs -X '!*.rpm' -- "$2" ) )
29 + COMPREPLY=( $( compgen -W '0 1 2 3 4 5 6 7 8 9' -- "$2" ) )
33 COMPREPLY=( $( compgen -W '1 2 3 4 5 6 7 8 9' -- "$2" ) )
37 + local min=2 max=$( getconf _NPROCESSORS_ONLN 2>/dev/null )
38 + [[ -z $max || $max -lt $min ]] && max=$min
39 + COMPREPLY=( $( compgen -W "{1..$max}" -- "$2" ) )
43 + _cr_compress_type "$1" "$2"
48 if [[ $2 == -* ]] ; then
49 @@ -42,9 +62,9 @@ _cr_createrepo()
50 --cachedir --checkts --no-database --update --update-md-path
51 --skip-stat --split --pkglist --includepkg --outputdir
52 --skip-symlinks --changelog-limit --unique-md-filenames
53 - --simple-md-filenames --distro --content --repo --revision --deltas
54 - --oldpackagedirs --num-deltas --read-pkgs-list
55 - --max-delta-rpm-size --workers' -- "$2" ) )
56 + --simple-md-filenames --retain-old-md --distro --content --repo
57 + --revision --deltas --oldpackagedirs --num-deltas --read-pkgs-list
58 + --max-delta-rpm-size --workers --compress-type' -- "$2" ) )
60 COMPREPLY=( $( compgen -d -- "$2" ) )
62 @@ -63,10 +83,14 @@ _cr_mergerepo()
63 COMPREPLY=( $( compgen -d -- "$2" ) )
67 + _cr_compress_type "" "$2"
72 COMPREPLY=( $( compgen -W '--version --help --repo --archlist --no-database
73 - --outputdir --nogroups --noupdateinfo' -- "$2" ) )
74 + --outputdir --nogroups --noupdateinfo --compress-type' -- "$2" ) )
76 complete -F _cr_mergerepo -o filenames mergerepo mergerepo.py
78 @@ -78,17 +102,22 @@ _cr_modifyrepo()
79 --version|-h|--help|--mdtype)
83 + _cr_compress_type "" "$2"
88 if [[ $2 == -* ]] ; then
89 - COMPREPLY=( $( compgen -W '--version --help --mdtype' -- "$2" ) )
90 + COMPREPLY=( $( compgen -W '--version --help --mdtype --remove
91 + --compress --compress-type' -- "$2" ) )
96 for (( i=1; i < ${#COMP_WORDS[@]}-1; i++ )) ; do
97 if [[ ${COMP_WORDS[i]} != -* &&
98 - ${COMP_WORDS[i-1]} != @(=|--mdtype) ]]; then
99 + ${COMP_WORDS[i-1]} != @(=|--@(md|compress-)type) ]]; then
100 argnum=$(( argnum+1 ))
103 diff --git a/createrepo.spec b/createrepo.spec
104 index 1e491cd..eea7092 100644
105 --- a/createrepo.spec
106 +++ b/createrepo.spec
107 @@ -11,7 +11,7 @@ URL: http://createrepo.baseurl.org/
108 BuildRoot: %{_tmppath}/%{name}-%{version}root
109 BuildArchitectures: noarch
110 Requires: python >= 2.1, rpm-python, rpm >= 0:4.1.1, libxml2-python
111 -Requires: yum-metadata-parser, yum >= 3.2.29, python-deltarpm
112 +Requires: yum-metadata-parser, yum >= 3.2.29, python-deltarpm, pyliblzma
115 This utility will generate a common metadata repository from a directory of
116 @@ -43,6 +43,9 @@ make DESTDIR=$RPM_BUILD_ROOT sysconfdir=%{_sysconfdir} install
117 %{python_sitelib}/createrepo
120 +* Fri Sep 9 2011 Seth Vidal <skvidal at fedoraproject.org>
123 * Wed Jan 26 2011 Seth Vidal <skvidal at fedoraproject.org>
126 diff --git a/createrepo/__init__.py b/createrepo/__init__.py
127 index 8f2538e..1b18a9f 100644
128 --- a/createrepo/__init__.py
129 +++ b/createrepo/__init__.py
130 @@ -26,15 +26,16 @@ import tempfile
134 +from select import select
136 -from yum import misc, Errors, to_unicode
137 -from yum.repoMDObject import RepoMD, RepoMDError, RepoData
138 +from yum import misc, Errors
139 +from yum.repoMDObject import RepoMD, RepoData
140 from yum.sqlutils import executeSQL
141 from yum.packageSack import MetaSack
142 -from yum.packages import YumAvailablePackage, YumLocalPackage
143 +from yum.packages import YumAvailablePackage
145 import rpmUtils.transaction
146 -from utils import _, errorprint, MDError
147 +from utils import _, errorprint, MDError, lzma, _available_compression
150 import sqlite3 as sqlite
151 @@ -46,8 +47,9 @@ try:
155 -from utils import _gzipOpen, bzipFile, checkAndMakeDir, GzipFile, \
156 +from utils import _gzipOpen, compressFile, compressOpen, checkAndMakeDir, GzipFile, \
157 checksum_and_rename, split_list_into_equal_chunks
158 +from utils import num_cpus_online
161 __version__ = '0.9.9'
162 @@ -74,7 +76,7 @@ class MetaDataConfig(object):
164 self.delta_relative = 'drpms/'
165 self.oldpackage_paths = [] # where to look for the old packages -
166 - self.deltafile = 'prestodelta.xml.gz'
167 + self.deltafile = 'prestodelta.xml'
168 self.num_deltas = 1 # number of older versions to delta (max)
169 self.max_delta_rpm_size = 100000000
170 self.update_md_path = None
171 @@ -86,9 +88,9 @@ class MetaDataConfig(object):
172 self.skip_symlinks = False
174 self.database_only = False
175 - self.primaryfile = 'primary.xml.gz'
176 - self.filelistsfile = 'filelists.xml.gz'
177 - self.otherfile = 'other.xml.gz'
178 + self.primaryfile = 'primary.xml'
179 + self.filelistsfile = 'filelists.xml'
180 + self.otherfile = 'other.xml'
181 self.repomdfile = 'repomd.xml'
182 self.tempdir = '.repodata'
183 self.finaldir = 'repodata'
184 @@ -108,8 +110,10 @@ class MetaDataConfig(object):
185 self.collapse_glibc_requires = True
186 self.workers = 1 # number of workers to fork off to grab metadata from the pkgs
187 self.worker_cmd = '/usr/share/createrepo/worker.py'
189 #self.worker_cmd = './worker.py' # helpful when testing
190 + self.retain_old_md = 0
191 + self.compress_type = 'compat'
194 class SimpleMDCallBack(object):
195 def errorlog(self, thing):
196 @@ -141,10 +145,23 @@ class MetaDataGenerator:
198 self.rpmlib_reqs = {}
200 + self.compat_compress = False
202 if not self.conf.directory and not self.conf.directories:
203 raise MDError, "No directory given on which to run."
206 + if self.conf.compress_type == 'compat':
207 + self.compat_compress = True
208 + self.conf.compress_type = None
210 + if not self.conf.compress_type:
211 + self.conf.compress_type = 'gz'
213 + if self.conf.compress_type not in utils._available_compression:
214 + raise MDError, "Compression %s not available: Please choose from: %s" \
215 + % (self.conf.compress_type, ', '.join(utils._available_compression))
218 if not self.conf.directories: # just makes things easier later
219 self.conf.directories = [self.conf.directory]
220 if not self.conf.directory: # ensure we have both in the config object
221 @@ -290,14 +307,13 @@ class MetaDataGenerator:
223 def extension_visitor(filelist, dirname, names):
225 + fn = os.path.join(dirname, fn)
226 if os.path.isdir(fn):
228 if self.conf.skip_symlinks and os.path.islink(fn):
230 elif fn[-extlen:].lower() == '%s' % (ext):
231 - relativepath = dirname.replace(startdir, "", 1)
232 - relativepath = relativepath.lstrip("/")
233 - filelist.append(os.path.join(relativepath, fn))
234 + filelist.append(fn[len(startdir):])
237 startdir = directory + '/'
238 @@ -311,7 +327,7 @@ class MetaDataGenerator:
239 def checkTimeStamps(self):
240 """check the timestamp of our target dir. If it is not newer than
241 the repodata return False, else True"""
242 - if self.conf.checkts:
243 + if self.conf.checkts and self.conf.mdtimestamp:
244 dn = os.path.join(self.conf.basedir, self.conf.directory)
245 files = self.getFileList(dn, '.rpm')
246 files = self.trimRpms(files)
247 @@ -410,9 +426,11 @@ class MetaDataGenerator:
249 def _setupPrimary(self):
250 # setup the primary metadata file
251 + # FIXME - make this be conf.compress_type once y-m-p is fixed
252 + fpz = self.conf.primaryfile + '.' + 'gz'
253 primaryfilepath = os.path.join(self.conf.outputdir, self.conf.tempdir,
254 - self.conf.primaryfile)
255 - fo = _gzipOpen(primaryfilepath, 'w')
257 + fo = compressOpen(primaryfilepath, 'w', 'gz')
258 fo.write('<?xml version="1.0" encoding="UTF-8"?>\n')
259 fo.write('<metadata xmlns="http://linux.duke.edu/metadata/common"' \
260 ' xmlns:rpm="http://linux.duke.edu/metadata/rpm" packages="%s">' %
261 @@ -421,9 +439,11 @@ class MetaDataGenerator:
263 def _setupFilelists(self):
264 # setup the filelist file
265 + # FIXME - make this be conf.compress_type once y-m-p is fixed
266 + fpz = self.conf.filelistsfile + '.' + 'gz'
267 filelistpath = os.path.join(self.conf.outputdir, self.conf.tempdir,
268 - self.conf.filelistsfile)
269 - fo = _gzipOpen(filelistpath, 'w')
271 + fo = compressOpen(filelistpath, 'w', 'gz')
272 fo.write('<?xml version="1.0" encoding="UTF-8"?>\n')
273 fo.write('<filelists xmlns="http://linux.duke.edu/metadata/filelists"' \
274 ' packages="%s">' % self.pkgcount)
275 @@ -431,9 +451,11 @@ class MetaDataGenerator:
277 def _setupOther(self):
278 # setup the other file
279 + # FIXME - make this be conf.compress_type once y-m-p is fixed
280 + fpz = self.conf.otherfile + '.' + 'gz'
281 otherfilepath = os.path.join(self.conf.outputdir, self.conf.tempdir,
282 - self.conf.otherfile)
283 - fo = _gzipOpen(otherfilepath, 'w')
285 + fo = compressOpen(otherfilepath, 'w', 'gz')
286 fo.write('<?xml version="1.0" encoding="UTF-8"?>\n')
287 fo.write('<otherdata xmlns="http://linux.duke.edu/metadata/other"' \
289 @@ -442,9 +464,10 @@ class MetaDataGenerator:
291 def _setupDelta(self):
292 # setup the other file
293 + fpz = self.conf.deltafile + '.' + self.conf.compress_type
294 deltafilepath = os.path.join(self.conf.outputdir, self.conf.tempdir,
295 - self.conf.deltafile)
296 - fo = _gzipOpen(deltafilepath, 'w')
298 + fo = compressOpen(deltafilepath, 'w', self.conf.compress_type)
299 fo.write('<?xml version="1.0" encoding="UTF-8"?>\n')
300 fo.write('<prestodelta>\n')
302 @@ -520,6 +543,7 @@ class MetaDataGenerator:
303 # go on their merry way
308 # if we're in --update mode then only act on the new/changed pkgs
310 @@ -530,39 +554,13 @@ class MetaDataGenerator:
312 if pkg.find("://") != -1:
313 old_pkg = os.path.basename(pkg)
314 - nodes = self.oldData.getNodes(old_pkg)
315 - if nodes is not None: # we have a match in the old metadata
316 + old_po = self.oldData.getNodes(old_pkg)
317 + if old_po: # we have a match in the old metadata
318 if self.conf.verbose:
319 self.callback.log(_("Using data from old metadata for %s")
321 - (primarynode, filenode, othernode) = nodes
323 - for node, outfile in ((primarynode, self.primaryfile),
324 - (filenode, self.flfile),
325 - (othernode, self.otherfile)):
329 - if self.conf.baseurl:
330 - anode = node.children
331 - while anode is not None:
332 - if anode.type != "element":
335 - if anode.name == "location":
336 - anode.setProp('xml:base', self.conf.baseurl)
339 - output = node.serialize('UTF-8', self.conf.pretty)
341 - outfile.write(output)
343 - if self.conf.verbose:
344 - self.callback.log(_("empty serialize on write to" \
345 - "%s in %s") % (outfile, pkg))
346 - outfile.write('\n')
348 - self.oldData.freeNodes(pkg)
349 + keptpkgs.append((pkg, old_po))
351 #FIXME - if we're in update and we have deltas enabled
352 # check the presto data for this pkg and write its info back out
354 @@ -584,32 +582,45 @@ class MetaDataGenerator:
356 if isinstance(pkg, YumAvailablePackage):
358 - self.read_pkgs.append(po.localpath)
359 + self.read_pkgs.append(po.localPkg())
361 # if we're dealing with remote pkgs - pitch it over to doing
362 # them one at a time, for now.
363 elif pkg.find('://') != -1:
364 - po = self.read_in_package(pkgfile, pkgpath=pkgpath, reldir=reldir)
365 + po = self.read_in_package(pkg, pkgpath=pkgpath, reldir=reldir)
366 self.read_pkgs.append(pkg)
369 - self.primaryfile.write(po.xml_dump_primary_metadata())
370 - self.flfile.write(po.xml_dump_filelists_metadata())
371 - self.otherfile.write(po.xml_dump_other_metadata(
372 - clog_limit=self.conf.changelog_limit))
373 + keptpkgs.append((pkg, po))
380 + keptpkgs.sort(reverse=True)
381 + # keptkgs is a list of (filename, po), pkgfiles is a list if filenames.
382 + # Need to write them in sorted(filename) order. We loop over pkgfiles,
383 + # inserting keptpkgs in right spots (using the upto argument).
384 + def save_keptpkgs(upto):
385 + while keptpkgs and (upto is None or keptpkgs[-1][0] < upto):
386 + filename, po = keptpkgs.pop()
387 + # reset baseurl in the old pkg
388 + po.basepath = self.conf.baseurl
389 + self.primaryfile.write(po.xml_dump_primary_metadata())
390 + self.flfile.write(po.xml_dump_filelists_metadata())
391 + self.otherfile.write(po.xml_dump_other_metadata(
392 + clog_limit=self.conf.changelog_limit))
395 # divide that list by the number of workers and fork off that many
397 # waitfor the workers to finish and as each one comes in
398 # open the files they created and write them out to our metadata
399 # add up the total pkg counts and return that value
400 - worker_tmp_path = tempfile.mkdtemp()
401 - worker_chunks = utils.split_list_into_equal_chunks(pkgfiles, self.conf.workers)
402 + self._worker_tmp_path = tempfile.mkdtemp() # setting this in the base object so we can clean it up later
403 + if self.conf.workers < 1:
404 + self.conf.workers = num_cpus_online()
406 + worker_chunks = split_list_into_equal_chunks(pkgfiles, self.conf.workers)
409 base_worker_cmdline = [self.conf.worker_cmd,
410 @@ -617,7 +628,8 @@ class MetaDataGenerator:
411 '--pkgoptions=_collapse_libc_requires=%s' % self.conf.collapse_glibc_requires,
412 '--pkgoptions=_cachedir=%s' % self.conf.cachedir,
413 '--pkgoptions=_baseurl=%s' % self.conf.baseurl,
414 - '--globalopts=clog_limit=%s' % self.conf.changelog_limit,]
415 + '--globalopts=clog_limit=%s' % self.conf.changelog_limit,
416 + '--globalopts=sumtype=%s' % self.conf.sumtype, ]
419 base_worker_cmdline.append('--quiet')
420 @@ -626,15 +638,14 @@ class MetaDataGenerator:
421 base_worker_cmdline.append('--verbose')
423 for worker_num in range(self.conf.workers):
424 - # make the worker directory
425 + pkl = self._worker_tmp_path + '/pkglist-%s' % worker_num
427 + f.write('\n'.join(worker_chunks[worker_num]))
431 workercmdline.extend(base_worker_cmdline)
432 - thisdir = worker_tmp_path + '/' + str(worker_num)
433 - if checkAndMakeDir(thisdir):
434 - workercmdline.append('--tmpmdpath=%s' % thisdir)
436 - raise MDError, "Unable to create worker path: %s" % thisdir
437 - workercmdline.extend(worker_chunks[worker_num])
438 + workercmdline.append('--pkglist=%s/pkglist-%s' % (self._worker_tmp_path, worker_num))
439 worker_cmd_dict[worker_num] = workercmdline
442 @@ -647,49 +658,60 @@ class MetaDataGenerator:
443 stderr=subprocess.PIPE)
444 worker_jobs[num] = job
447 - while gimmebreak != len(worker_jobs.keys()):
449 - for (num,job) in worker_jobs.items():
450 - if job.poll() is not None:
452 - line = job.stdout.readline()
454 + files = self.primaryfile, self.flfile, self.otherfile
455 + def log_messages(num):
456 + job = worker_jobs[num]
458 + # check stdout and stderr
459 + for stream in select((job.stdout, job.stderr), (), ())[0]:
460 + line = stream.readline()
464 + if stream is job.stdout:
465 + if line.startswith('*** '):
466 + # get data, save to local files
467 + for out, size in zip(files, line[4:].split()):
468 + out.write(stream.read(int(size)))
470 self.callback.log('Worker %s: %s' % (num, line.rstrip()))
471 - line = job.stderr.readline()
474 self.callback.errorlog('Worker %s: %s' % (num, line.rstrip()))
476 + for i, pkg in enumerate(pkgfiles):
477 + # insert cached packages
480 + # save output to local files
481 + log_messages(i % self.conf.workers)
483 + for (num, job) in worker_jobs.items():
484 + # process remaining messages on stderr
487 + if job.wait() != 0:
488 + msg = "Worker exited with non-zero value: %s. Fatal." % job.returncode
489 + self.callback.errorlog(msg)
493 if not self.conf.quiet:
494 self.callback.log("Workers Finished")
495 - # finished with workers
496 - # go to their dirs and add the contents
497 - if not self.conf.quiet:
498 - self.callback.log("Gathering worker results")
499 - for num in range(self.conf.workers):
500 - for (fn, fo) in (('primary.xml', self.primaryfile),
501 - ('filelists.xml', self.flfile),
502 - ('other.xml', self.otherfile)):
503 - fnpath = worker_tmp_path + '/' + str(num) + '/' + fn
504 - if os.path.exists(fnpath):
505 - fo.write(open(fnpath, 'r').read())
508 for pkgfile in pkgfiles:
510 - po = self.read_in_package(pkgfile, pkgpath=pkgpath, reldir=reldir)
511 - self._do_delta_rpm_package(po)
513 + po = self.read_in_package(pkgfile, pkgpath=pkgpath, reldir=reldir)
514 + self._do_delta_rpm_package(po)
518 self.read_pkgs.append(pkgfile)
520 + save_keptpkgs(None) # append anything left
521 return self.current_pkg
524 def closeMetadataDocs(self):
525 - if not self.conf.quiet:
526 - self.callback.log('')
529 # save them up to the tmp locations:
530 if not self.conf.quiet:
531 self.callback.log(_('Saving Primary metadata'))
532 @@ -784,7 +806,6 @@ class MetaDataGenerator:
533 return self._old_package_dict
535 self._old_package_dict = {}
537 for d in self.conf.oldpackage_paths:
538 for f in self.getFileList(d, '.rpm'):
540 @@ -833,7 +854,7 @@ class MetaDataGenerator:
541 return ' '.join(results)
543 def _createRepoDataObject(self, mdfile, mdtype, compress=True,
544 - compress_type='gzip', attribs={}):
545 + compress_type=None, attribs={}):
546 """return random metadata as RepoData object to be added to RepoMD
547 mdfile = complete path to file
548 mdtype = the metadata type to use
549 @@ -843,15 +864,13 @@ class MetaDataGenerator:
550 sfile = os.path.basename(mdfile)
551 fo = open(mdfile, 'r')
552 outdir = os.path.join(self.conf.outputdir, self.conf.tempdir)
553 + if not compress_type:
554 + compress_type = self.conf.compress_type
556 - if compress_type == 'gzip':
557 - sfile = '%s.gz' % sfile
558 - outfn = os.path.join(outdir, sfile)
559 - output = GzipFile(filename = outfn, mode='wb')
560 - elif compress_type == 'bzip2':
561 - sfile = '%s.bz2' % sfile
562 - outfn = os.path.join(outdir, sfile)
563 - output = BZ2File(filename = outfn, mode='wb')
564 + sfile = '%s.%s' % (sfile, compress_type)
565 + outfn = os.path.join(outdir, sfile)
566 + output = compressOpen(outfn, mode='wb', compress_type=compress_type)
569 outfn = os.path.join(outdir, sfile)
570 output = open(outfn, 'w')
571 @@ -874,14 +893,13 @@ class MetaDataGenerator:
573 thisdata = RepoData()
574 thisdata.type = mdtype
576 thisdata.location = (self.conf.baseurl, os.path.join(self.conf.finaldir, sfile))
577 thisdata.checksum = (self.conf.sumtype, csum)
579 thisdata.openchecksum = (self.conf.sumtype, open_csum)
581 thisdata.size = str(os.stat(outfn).st_size)
582 - thisdata.timestamp = str(os.stat(outfn).st_mtime)
583 + thisdata.timestamp = str(int(os.stat(outfn).st_mtime))
584 for (k, v) in attribs.items():
585 setattr(thisdata, k, str(v))
587 @@ -925,9 +943,14 @@ class MetaDataGenerator:
588 rp = sqlitecachec.RepodataParserSqlite(repopath, repomd.repoid, None)
590 for (rpm_file, ftype) in workfiles:
591 + # when we fix y-m-p and non-gzipped xml files - then we can make this just add
592 + # self.conf.compress_type
593 + if ftype in ('other', 'filelists', 'primary'):
594 + rpm_file = rpm_file + '.' + 'gz'
595 + elif rpm_file.find('.') != -1 and rpm_file.split('.')[-1] not in _available_compression:
596 + rpm_file = rpm_file + '.' + self.conf.compress_type
597 complete_path = os.path.join(repopath, rpm_file)
599 - zfo = _gzipOpen(complete_path)
600 + zfo = compressOpen(complete_path)
601 # This is misc.checksum() done locally so we can get the size too.
602 data = misc.Checksums([sumtype])
603 while data.read(zfo, 2**16):
604 @@ -966,14 +989,20 @@ class MetaDataGenerator:
605 good_name = '%s.sqlite' % ftype
606 resultpath = os.path.join(repopath, good_name)
608 + # compat compression for rhel5 compatibility from fedora :(
609 + compress_type = self.conf.compress_type
610 + if self.compat_compress:
611 + compress_type = 'bz2'
613 # rename from silly name to not silly name
614 os.rename(tmp_result_path, resultpath)
615 - compressed_name = '%s.bz2' % good_name
616 + compressed_name = '%s.%s' % (good_name, compress_type)
617 result_compressed = os.path.join(repopath, compressed_name)
618 db_csums[ftype] = misc.checksum(sumtype, resultpath)
621 - bzipFile(resultpath, result_compressed)
623 + compressFile(resultpath, result_compressed, compress_type)
624 # csum the compressed file
625 db_compressed_sums[ftype] = misc.checksum(sumtype,
627 @@ -983,8 +1012,8 @@ class MetaDataGenerator:
628 os.unlink(resultpath)
630 if self.conf.unique_md_filenames:
631 - csum_compressed_name = '%s-%s.bz2' % (
632 - db_compressed_sums[ftype], good_name)
633 + csum_compressed_name = '%s-%s.%s' % (
634 + db_compressed_sums[ftype], good_name, compress_type)
635 csum_result_compressed = os.path.join(repopath,
636 csum_compressed_name)
637 os.rename(result_compressed, csum_result_compressed)
638 @@ -1001,7 +1030,7 @@ class MetaDataGenerator:
639 data.location = (self.conf.baseurl,
640 os.path.join(self.conf.finaldir, compressed_name))
641 data.checksum = (sumtype, db_compressed_sums[ftype])
642 - data.timestamp = str(db_stat.st_mtime)
643 + data.timestamp = str(int(db_stat.st_mtime))
644 data.size = str(db_stat.st_size)
645 data.opensize = str(un_stat.st_size)
646 data.openchecksum = (sumtype, db_csums[ftype])
647 @@ -1020,7 +1049,13 @@ class MetaDataGenerator:
648 data.openchecksum = (sumtype, uncsum)
650 if self.conf.unique_md_filenames:
651 - res_file = '%s-%s.xml.gz' % (csum, ftype)
652 + if ftype in ('primary', 'filelists', 'other'):
655 + compress = self.conf.compress_type
657 + main_name = '.'.join(rpm_file.split('.')[:-1])
658 + res_file = '%s-%s.%s' % (csum, main_name, compress)
659 orig_file = os.path.join(repopath, rpm_file)
660 dest_file = os.path.join(repopath, res_file)
661 os.rename(orig_file, dest_file)
662 @@ -1046,7 +1081,7 @@ class MetaDataGenerator:
665 if self.conf.additional_metadata:
666 - for md_type, mdfile in self.conf.additional_metadata.items():
667 + for md_type, md_file in self.conf.additional_metadata.items():
668 mdcontent = self._createRepoDataObject(md_file, md_type)
669 repomd.repoData[mdcontent.type] = mdcontent
671 @@ -1110,23 +1145,43 @@ class MetaDataGenerator:
673 'Could not remove old metadata file: %s: %s') % (oldfile, e)
675 - # Move everything else back from olddir (eg. repoview files)
677 - old_contents = os.listdir(output_old_dir)
678 - except (OSError, IOError), e:
688 for f in os.listdir(output_old_dir):
689 oldfile = os.path.join(output_old_dir, f)
690 finalfile = os.path.join(output_final_dir, f)
691 - if f.find('-') != -1 and f.split('-')[1] in ('primary.sqlite.bz2',
692 - 'filelists.sqlite.bz2', 'primary.xml.gz','other.sqlite.bz2',
693 - 'other.xml.gz','filelists.xml.gz'):
694 - os.remove(oldfile) # kill off the old ones
696 - if f in ('filelists.sqlite.bz2', 'other.sqlite.bz2',
697 - 'primary.sqlite.bz2'):
700 + for (end,lst) in (('-primary.sqlite', old_pr_db), ('-primary.xml', old_pr),
701 + ('-filelists.sqlite', old_fl_db), ('-filelists.xml', old_fl),
702 + ('-other.sqlite', old_ot_db), ('-other.xml', old_ot)):
703 + fn = '.'.join(f.split('.')[:-1])
704 + if fn.endswith(end):
705 + lst.append(oldfile)
708 + # make a list of the old metadata files we don't want to remove.
709 + for lst in (old_pr, old_fl, old_ot, old_pr_db, old_fl_db, old_ot_db):
710 + sortlst = sorted(lst, key=lambda x: os.path.getmtime(x),
712 + for thisf in sortlst[self.conf.retain_old_md:]:
713 + old_to_remove.append(thisf)
715 + for f in os.listdir(output_old_dir):
716 + oldfile = os.path.join(output_old_dir, f)
717 + finalfile = os.path.join(output_final_dir, f)
718 + fn = '.'.join(f.split('.')[:-1])
719 + if fn in ('filelists.sqlite', 'other.sqlite',
720 + 'primary.sqlite') or oldfile in old_to_remove:
723 + except (OSError, IOError), e:
725 + 'Could not remove old metadata file: %s: %s') % (oldfile, e)
728 if os.path.exists(finalfile):
729 @@ -1147,14 +1202,19 @@ class MetaDataGenerator:
730 msg += _('Error was %s') % e
734 - os.rmdir(output_old_dir)
736 - self.errorlog(_('Could not remove old metadata dir: %s')
737 - % self.conf.olddir)
738 - self.errorlog(_('Error was %s') % e)
739 - self.errorlog(_('Please clean up this directory manually.'))
740 + self._cleanup_tmp_repodata_dir()
741 + self._cleanup_update_tmp_dir()
742 + self._write_out_read_pkgs_list()
745 + def _cleanup_update_tmp_dir(self):
746 + if not self.conf.update:
749 + shutil.rmtree(self.oldData._repo.basecachedir, ignore_errors=True)
750 + shutil.rmtree(self.oldData._repo.base_persistdir, ignore_errors=True)
752 + def _write_out_read_pkgs_list(self):
753 # write out the read_pkgs_list file with self.read_pkgs
754 if self.conf.read_pkgs_list:
756 @@ -1167,6 +1227,23 @@ class MetaDataGenerator:
757 % self.conf.read_pkgs_list)
758 self.errorlog(_('Error was %s') % e)
760 + def _cleanup_tmp_repodata_dir(self):
761 + output_old_dir = os.path.join(self.conf.outputdir, self.conf.olddir)
762 + output_temp_dir = os.path.join(self.conf.outputdir, self.conf.tempdir)
763 + for dirbase in (self.conf.olddir, self.conf.tempdir):
764 + dirpath = os.path.join(self.conf.outputdir, dirbase)
765 + if os.path.exists(dirpath):
769 + self.errorlog(_('Could not remove temp metadata dir: %s')
771 + self.errorlog(_('Error was %s') % e)
772 + self.errorlog(_('Please clean up this directory manually.'))
773 + # our worker tmp path
774 + if hasattr(self, '_worker_tmp_path') and os.path.exists(self._worker_tmp_path):
775 + shutil.rmtree(self._worker_tmp_path, ignore_errors=True)
777 def setup_sqlite_dbs(self, initdb=True):
778 """sets up the sqlite dbs w/table schemas and db_infos"""
779 destdir = os.path.join(self.conf.outputdir, self.conf.tempdir)
780 @@ -1194,24 +1271,6 @@ class SplitMetaDataGenerator(MetaDataGenerator):
781 (scheme, netloc, path, query, fragid) = urlparse.urlsplit(url)
782 return urlparse.urlunsplit((scheme, netloc, path, query, str(fragment)))
784 - def getFileList(self, directory, ext):
788 - def extension_visitor(arg, dirname, names):
790 - if os.path.isdir(fn):
792 - elif fn[-extlen:].lower() == '%s' % (ext):
793 - reldir = os.path.basename(dirname)
794 - if reldir == os.path.basename(directory):
796 - arg.append(os.path.join(reldir, fn))
799 - os.path.walk(directory, extension_visitor, rpmlist)
802 def doPkgMetadata(self):
803 """all the heavy lifting for the package metadata"""
804 if len(self.conf.directories) == 1:
805 @@ -1232,6 +1291,19 @@ class SplitMetaDataGenerator(MetaDataGenerator):
806 thisdir = os.path.join(self.conf.basedir, mydir)
808 filematrix[mydir] = self.getFileList(thisdir, '.rpm')
810 + # pkglist is a bit different for split media, as we have to know
811 + # which dir. it belongs to. So we walk the dir. and then filter.
812 + # We could be faster by not walking the dir. ... but meh.
813 + if self.conf.pkglist:
814 + pkglist = set(self.conf.pkglist)
816 + for fname in filematrix[mydir]:
817 + if fname not in pkglist:
820 + filematrix[mydir] = pkgs
822 self.trimRpms(filematrix[mydir])
823 self.pkgcount += len(filematrix[mydir])
825 @@ -1240,7 +1312,6 @@ class SplitMetaDataGenerator(MetaDataGenerator):
826 self.conf.baseurl = self._getFragmentUrl(self.conf.baseurl, mediano)
828 self.openMetadataDocs()
829 - original_basedir = self.conf.basedir
830 for mydir in self.conf.directories:
831 self.conf.baseurl = self._getFragmentUrl(self.conf.baseurl, mediano)
832 self.writeMetadataDocs(filematrix[mydir], mydir)
833 diff --git a/createrepo/merge.py b/createrepo/merge.py
834 index b3b2ea1..1ac43bb 100644
835 --- a/createrepo/merge.py
836 +++ b/createrepo/merge.py
837 @@ -24,6 +24,7 @@ from yum.misc import unique, getCacheDir
841 +from utils import MDError
845 @@ -84,6 +85,8 @@ class RepoMergeBase:
848 for r in self.repolist:
850 + r = 'file://' + r # just fix the file repos, this is silly.
852 rid = 'repo%s' % count
853 n = self.yumbase.add_enable_repo(rid, baseurls=[r],
854 @@ -92,7 +95,10 @@ class RepoMergeBase:
855 n._merge_rank = count
858 - self.yumbase._getSacks(archlist=self.archlist)
860 + self.yumbase._getSacks(archlist=self.archlist)
861 + except yum.Errors.RepoError, e:
862 + raise MDError, "Could not setup merge repo pkgsack: %s" % e
864 myrepos = self.yumbase.repos.listEnabled()
866 @@ -102,11 +108,16 @@ class RepoMergeBase:
867 def write_metadata(self, outputdir=None):
868 mytempdir = tempfile.mkdtemp()
870 - comps_fn = mytempdir + '/groups.xml'
871 - compsfile = open(comps_fn, 'w')
872 - compsfile.write(self.yumbase.comps.xml())
874 - self.mdconf.groupfile=comps_fn
876 + comps_fn = mytempdir + '/groups.xml'
877 + compsfile = open(comps_fn, 'w')
878 + compsfile.write(self.yumbase.comps.xml())
880 + except yum.Errors.GroupsError, e:
881 + # groups not being available shouldn't be a fatal error
884 + self.mdconf.groupfile=comps_fn
887 ui_fn = mytempdir + '/updateinfo.xml'
888 diff --git a/createrepo/readMetadata.py b/createrepo/readMetadata.py
889 index 27d3690..54863cb 100644
890 --- a/createrepo/readMetadata.py
891 +++ b/createrepo/readMetadata.py
893 # Copyright 2006 Red Hat
898 from utils import errorprint, _
900 -from yum import repoMDObject
902 +from yum import misc
903 +from yum.Errors import YumBaseError
905 +class CreaterepoPkgOld(yum.sqlitesack.YumAvailablePackageSqlite):
906 + # special for special people like us.
907 + def _return_remote_location(self):
910 + msg = """<location xml:base="%s" href="%s"/>\n""" % (
911 + misc.to_xml(self.basepath, attrib=True),
912 + misc.to_xml(self.relativepath, attrib=True))
914 + msg = """<location href="%s"/>\n""" % misc.to_xml(self.relativepath, attrib=True)
919 class MetadataIndex(object):
920 @@ -30,178 +44,72 @@ class MetadataIndex(object):
923 self.outputdir = outputdir
924 + realpath = os.path.realpath(outputdir)
925 repodatadir = self.outputdir + '/repodata'
926 - myrepomdxml = repodatadir + '/repomd.xml'
927 - if os.path.exists(myrepomdxml):
928 - repomd = repoMDObject.RepoMD('garbageid', myrepomdxml)
929 - b = repomd.getData('primary').location[1]
930 - f = repomd.getData('filelists').location[1]
931 - o = repomd.getData('other').location[1]
932 - basefile = os.path.join(self.outputdir, b)
933 - filelistfile = os.path.join(self.outputdir, f)
934 - otherfile = os.path.join(self.outputdir, o)
936 - basefile = filelistfile = otherfile = ""
938 - self.files = {'base' : basefile,
939 - 'filelist' : filelistfile,
940 - 'other' : otherfile}
942 + self._repo = yum.yumRepo.YumRepository('garbageid')
943 + self._repo.baseurl = 'file://' + realpath
944 + self._repo.basecachedir = tempfile.mkdtemp(dir='/var/tmp', prefix="createrepo")
945 + self._repo.base_persistdir = tempfile.mkdtemp(dir='/var/tmp', prefix="createrepo-p")
946 + self._repo.metadata_expire = 1
947 + self._repo.gpgcheck = 0
948 + self._repo.repo_gpgcheck = 0
949 + self._repo._sack = yum.sqlitesack.YumSqlitePackageSack(CreaterepoPkgOld)
950 + self.pkg_tups_by_path = {}
953 + except YumBaseError, e:
954 + print "Could not find valid repo at: %s" % self.outputdir
958 - """Read in and index old repo data"""
959 - self.basenodes = {}
960 - self.filesnodes = {}
961 - self.othernodes = {}
963 + """Read in old repodata"""
964 if self.opts.get('verbose'):
965 print _("Scanning old repo data")
966 - for fn in self.files.values():
967 - if not os.path.exists(fn):
969 - errorprint(_("Warning: Old repodata file missing: %s") % fn)
971 - root = libxml2.parseFile(self.files['base']).getRootElement()
972 - self._scanPackageNodes(root, self._handleBase)
973 - if self.opts.get('verbose'):
974 - print _("Indexed %i base nodes" % len(self.basenodes))
975 - root = libxml2.parseFile(self.files['filelist']).getRootElement()
976 - self._scanPackageNodes(root, self._handleFiles)
977 - if self.opts.get('verbose'):
978 - print _("Indexed %i filelist nodes" % len(self.filesnodes))
979 - root = libxml2.parseFile(self.files['other']).getRootElement()
980 - self._scanPackageNodes(root, self._handleOther)
981 - if self.opts.get('verbose'):
982 - print _("Indexed %i other nodes" % len(self.othernodes))
983 - #reverse index pkg ids to track references
985 - for relpath, pkgid in self.pkg_ids.iteritems():
986 - self.pkgrefs.setdefault(pkgid,[]).append(relpath)
988 - def _scanPackageNodes(self, root, handler):
989 - node = root.children
990 - while node is not None:
991 - if node.type != "element":
993 + self._repo.sack.populate(self._repo, 'all', None, False)
994 + for thispo in self._repo.sack:
995 + mtime = thispo.filetime
997 + relpath = thispo.relativepath
998 + do_stat = self.opts.get('do_stat', True)
1000 + print _("mtime missing for %s") % relpath
1002 - if node.name == "package":
1006 - def _handleBase(self, node):
1008 - node = node.children
1013 - do_stat = self.opts.get('do_stat', True)
1014 - while node is not None:
1015 - if node.type != "element":
1018 + print _("size missing for %s") % relpath
1020 - if node.name == "checksum":
1021 - pkgid = node.content
1022 - elif node.name == "time":
1023 - mtime = int(node.prop('file'))
1024 - elif node.name == "size":
1025 - size = int(node.prop('package'))
1026 - elif node.name == "location":
1027 - relpath = node.prop('href')
1029 - if relpath is None:
1030 - print _("Incomplete data for node")
1033 - print _("pkgid missing for %s") % relpath
1036 - print _("mtime missing for %s") % relpath
1039 - print _("size missing for %s") % relpath
1042 - filepath = os.path.join(self.opts['pkgdir'], relpath)
1044 - st = os.stat(filepath)
1046 - #file missing -- ignore
1048 - if not stat.S_ISREG(st.st_mode):
1051 - #check size and mtime
1052 - if st.st_size != size:
1053 - if self.opts.get('verbose'):
1054 - print _("Size (%i -> %i) changed for file %s") % (size,st.st_size,filepath)
1056 - if int(st.st_mtime) != mtime:
1057 - if self.opts.get('verbose'):
1058 - print _("Modification time changed for %s") % filepath
1060 - #otherwise we index
1061 - self.basenodes[relpath] = top
1062 - self.pkg_ids[relpath] = pkgid
1064 - def _handleFiles(self, node):
1065 - pkgid = node.prop('pkgid')
1067 - self.filesnodes[pkgid] = node
1069 - def _handleOther(self, node):
1070 - pkgid = node.prop('pkgid')
1072 - self.othernodes[pkgid] = node
1074 + filepath = os.path.join(self.opts['pkgdir'], relpath)
1076 + st = os.stat(filepath)
1078 + #file missing -- ignore
1080 + if not stat.S_ISREG(st.st_mode):
1083 + #check size and mtime
1084 + if st.st_size != size:
1085 + if self.opts.get('verbose'):
1086 + print _("Size (%i -> %i) changed for file %s") % (size,st.st_size,filepath)
1088 + if int(st.st_mtime) != mtime:
1089 + if self.opts.get('verbose'):
1090 + print _("Modification time changed for %s") % filepath
1093 + self.pkg_tups_by_path[relpath] = thispo.pkgtup
1096 - def getNodes(self, relpath):
1097 - """Return base, filelist, and other nodes for file, if they exist
1099 - Returns a tuple of nodes, or None if not found
1100 + def getNodes(self, relpath):
1101 + """return a package object based on relative path of pkg
1103 - bnode = self.basenodes.get(relpath,None)
1106 - pkgid = self.pkg_ids.get(relpath,None)
1108 - print _("No pkgid found for: %s") % relpath
1110 - fnode = self.filesnodes.get(pkgid,None)
1113 - onode = self.othernodes.get(pkgid,None)
1116 - return bnode, fnode, onode
1118 - def freeNodes(self,relpath):
1120 - """Free up nodes corresponding to file, if possible"""
1121 - bnode = self.basenodes.get(relpath,None)
1123 - print "Missing node for %s" % relpath
1125 - bnode.unlinkNode()
1127 - del self.basenodes[relpath]
1128 - pkgid = self.pkg_ids.get(relpath,None)
1130 - print _("No pkgid found for: %s") % relpath
1132 - del self.pkg_ids[relpath]
1133 - dups = self.pkgrefs.get(pkgid)
1134 - dups.remove(relpath)
1138 - del self.pkgrefs[pkgid]
1139 - for nodes in self.filesnodes, self.othernodes:
1140 - node = nodes.get(pkgid)
1141 - if node is not None:
1145 + if relpath in self.pkg_tups_by_path:
1146 + pkgtup = self.pkg_tups_by_path[relpath]
1147 + return self._repo.sack.searchPkgTuple(pkgtup)[0]
1152 if __name__ == "__main__":
1154 @@ -209,9 +117,9 @@ if __name__ == "__main__":
1157 idx = MetadataIndex(cwd, opts)
1158 - for fn in idx.basenodes.keys():
1159 - a,b,c, = idx.getNodes(fn)
1164 + for fn in idx.pkg_tups_by_path:
1165 + po = idx.getNodes(fn)
1166 + print po.xml_dump_primary_metadata()
1167 + print po.xml_dump_filelists_metadata()
1168 + print po.xml_dump_other_metadata()
1170 diff --git a/createrepo/utils.py b/createrepo/utils.py
1171 index 995c3b9..b0d92ec 100644
1172 --- a/createrepo/utils.py
1173 +++ b/createrepo/utils.py
1174 @@ -23,6 +23,12 @@ import bz2
1176 from gzip import write32u, FNAME
1177 from yum import misc
1178 +_available_compression = ['gz', 'bz2']
1181 + _available_compression.append('xz')
1182 +except ImportError:
1185 def errorprint(stuff):
1186 print >> sys.stderr, stuff
1187 @@ -34,22 +40,14 @@ def _(args):
1189 class GzipFile(gzip.GzipFile):
1190 def _write_gzip_header(self):
1191 + # Generate a header that is easily reproduced with gzip -9 -n on
1192 + # an unix-like system
1193 self.fileobj.write('\037\213') # magic header
1194 self.fileobj.write('\010') # compression method
1195 - if hasattr(self, 'name'):
1196 - fname = self.name[:-3]
1198 - fname = self.filename[:-3]
1202 - self.fileobj.write(chr(flags))
1203 - write32u(self.fileobj, long(0))
1204 - self.fileobj.write('\002')
1205 - self.fileobj.write('\377')
1207 - self.fileobj.write(fname + '\000')
1209 + self.fileobj.write('\000') # flags
1210 + write32u(self.fileobj, long(0)) # timestamp
1211 + self.fileobj.write('\002') # max compression
1212 + self.fileobj.write('\003') # UNIX
1214 def _gzipOpen(filename, mode="rb", compresslevel=9):
1215 return GzipFile(filename, mode, compresslevel)
1216 @@ -69,6 +67,75 @@ def bzipFile(source, dest):
1220 +def xzFile(source, dest):
1221 + if not 'xz' in _available_compression:
1222 + raise MDError, "Cannot use xz for compression, library/module is not available"
1224 + s_fn = open(source, 'rb')
1225 + destination = lzma.LZMAFile(dest, 'w')
1228 + data = s_fn.read(1024000)
1230 + if not data: break
1231 + destination.write(data)
1233 + destination.close()
1236 +def gzFile(source, dest):
1238 + s_fn = open(source, 'rb')
1239 + destination = GzipFile(dest, 'w')
1242 + data = s_fn.read(1024000)
1244 + if not data: break
1245 + destination.write(data)
1247 + destination.close()
1252 + def __init__(self, **attr):
1253 + self.__dict__ = attr
1256 +def compressFile(source, dest, compress_type):
1257 + """Compress an existing file using any compression type from source to dest"""
1259 + if compress_type == 'xz':
1260 + xzFile(source, dest)
1261 + elif compress_type == 'bz2':
1262 + bzipFile(source, dest)
1263 + elif compress_type == 'gz':
1264 + gzFile(source, dest)
1266 + raise MDError, "Unknown compression type %s" % compress_type
1268 +def compressOpen(fn, mode='rb', compress_type=None):
1270 + if not compress_type:
1271 + # we are readonly and we don't give a compress_type - then guess based on the file extension
1272 + compress_type = fn.split('.')[-1]
1273 + if compress_type not in _available_compression:
1274 + compress_type = 'gz'
1276 + if compress_type == 'xz':
1277 + fh = lzma.LZMAFile(fn, mode)
1279 + fh = Duck(write=lambda s, write=fh.write: s != '' and write(s),
1282 + elif compress_type == 'bz2':
1283 + return bz2.BZ2File(fn, mode)
1284 + elif compress_type == 'gz':
1285 + return _gzipOpen(fn, mode)
1287 + raise MDError, "Unknown compression type %s" % compress_type
1289 def returnFD(filename):
1291 fdno = os.open(filename, os.O_RDONLY)
1292 @@ -124,15 +191,28 @@ def encodefiletypelist(filetypelist):
1295 def split_list_into_equal_chunks(seq, num_chunks):
1296 - avg = len(seq) / float(num_chunks)
1299 - while last < len(seq):
1300 - out.append(seq[int(last):int(last + avg)])
1303 + """it's used on sorted input which is then merged in order"""
1304 + out = [[] for i in range(num_chunks)]
1305 + for i, item in enumerate(seq):
1306 + out[i % num_chunks].append(item)
1309 +def num_cpus_online(unknown=1):
1310 + if not hasattr(os, "sysconf"):
1313 + if not os.sysconf_names.has_key("SC_NPROCESSORS_ONLN"):
1316 + ncpus = os.sysconf("SC_NPROCESSORS_ONLN")
1318 + if int(ncpus) > 0:
1326 class MDError(Exception):
1327 def __init__(self, value=None):
1328 diff --git a/createrepo/yumbased.py b/createrepo/yumbased.py
1329 index ac06196..f87ac6d 100644
1330 --- a/createrepo/yumbased.py
1331 +++ b/createrepo/yumbased.py
1337 + oumask = os.umask(0)
1340 +_b4rpm_oumask = _get_umask()
1344 @@ -86,6 +91,9 @@ class CreateRepoPackage(YumLocalPackage):
1345 csumo = os.fdopen(csumo, 'w', -1)
1346 csumo.write(checksum)
1348 + # tempfile forces 002 ... we want to undo that, so that users
1349 + # can share the cache. BZ 833350.
1350 + os.chmod(tmpfilename, 0666 ^ _b4rpm_oumask)
1351 os.rename(tmpfilename, csumfile)
1354 diff --git a/docs/createrepo.8 b/docs/createrepo.8
1355 index e3c4c3b..ff359de 100644
1356 --- a/docs/createrepo.8
1357 +++ b/docs/createrepo.8
1358 @@ -53,7 +53,8 @@ gullible).
1359 Don't generate repo metadata, if their timestamps are newer than its rpms.
1360 This option decreases the processing time drastically again, if you happen
1361 to run it on an unmodified repo, but it is (currently) mutual exclusive
1362 -with the --split option.
1363 +with the --split option. NOTE: This command will not notice when
1364 +packages have been removed from repo. Use --update to handle that.
1366 .IP "\fB\--split\fP"
1367 Run in split media mode. Rather than pass a single directory, take a set of
1368 @@ -104,7 +105,16 @@ Tells createrepo to generate deltarpms and the delta metadata
1369 paths to look for older pkgs to delta against. Can be specified multiple times
1370 .IP "\fB\--num-deltas\fP int"
1371 the number of older versions to make deltas against. Defaults to 1
1373 +.IP "\fB\--read-pkgs-list\fP READ_PKGS_LIST
1374 +output the paths to the pkgs actually read useful with --update
1375 +.IP "\fB\--max-delta-rpm-size\fP MAX_DELTA_RPM_SIZE
1376 +max size of an rpm that to run deltarpm against (in bytes)
1377 +.IP "\fB\--workers\fP WORKERS
1378 +number of workers to spawn to read rpms
1379 +.IP "\fB\--compress-type\fP
1380 +specify which compression method to use: compat (default),
1381 +xz (may not be available), gz, bz2.
1385 Here is an example of a repository with a groups file. Note that the
1386 diff --git a/genpkgmetadata.py b/genpkgmetadata.py
1387 index 8c98191..c46e441 100755
1388 --- a/genpkgmetadata.py
1389 +++ b/genpkgmetadata.py
1390 @@ -37,6 +37,12 @@ def parse_args(args, conf):
1391 Sanity check all the things being passed in.
1394 + def_workers = os.nice(0)
1395 + if def_workers > 0:
1396 + def_workers = 1 # We are niced, so just use a single worker.
1398 + def_workers = 0 # zoooom....
1400 _def = yum.misc._default_checksums[0]
1401 _avail = yum.misc._available_checksums
1402 parser = OptionParser(version = "createrepo %s" % createrepo.__version__)
1403 @@ -100,6 +106,8 @@ def parse_args(args, conf):
1404 parser.add_option("--simple-md-filenames", dest="simple_md_filenames",
1405 help="do not include the file's checksum in the filename, helps with proxies",
1406 default=False, action="store_true")
1407 + parser.add_option("--retain-old-md", default=0, type='int', dest='retain_old_md',
1408 + help="keep around the latest (by timestamp) N copies of the old repodata")
1409 parser.add_option("--distro", default=[], action="append",
1410 help="distro tag and optional cpeid: --distro" "'cpeid,textname'")
1411 parser.add_option("--content", default=[], dest='content_tags',
1412 @@ -119,10 +127,15 @@ def parse_args(args, conf):
1413 parser.add_option("--max-delta-rpm-size", default=100000000,
1414 dest='max_delta_rpm_size', type='int',
1415 help="max size of an rpm that to run deltarpm against (in bytes)")
1417 - parser.add_option("--workers", default=1,
1418 + parser.add_option("--workers", default=def_workers,
1419 dest='workers', type='int',
1420 help="number of workers to spawn to read rpms")
1421 + parser.add_option("--xz", default=False,
1422 + action="store_true",
1423 + help="use xz for repodata compression")
1424 + parser.add_option("--compress-type", default='compat', dest="compress_type",
1425 + help="which compression type to use")
1428 (opts, argsleft) = parser.parse_args(args)
1429 if len(argsleft) > 1 and not opts.split:
1430 @@ -138,6 +151,9 @@ def parse_args(args, conf):
1432 directories = argsleft
1434 + if opts.workers >= 128:
1435 + errorprint(_('Warning: More than 128 workers is a lot. Limiting.'))
1436 + opts.workers = 128
1437 if opts.sumtype == 'sha1':
1438 errorprint(_('Warning: It is more compatible to use sha instead of sha1'))
1440 @@ -155,6 +171,11 @@ def parse_args(args, conf):
1443 opts.database = False
1445 + # xz is just a shorthand for compress_type
1446 + if opts.xz and opts.compress_type == 'compat':
1447 + opts.compress_type='xz'
1450 # let's switch over to using the conf object - put all the opts into it
1451 for opt in parser.option_list:
1452 @@ -240,6 +261,7 @@ def main(args):
1453 if mdgen.checkTimeStamps():
1454 if mdgen.conf.verbose:
1455 print _('repo is up to date')
1456 + mdgen._cleanup_tmp_repodata_dir()
1460 diff --git a/mergerepo.py b/mergerepo.py
1461 index 05e5f5e..80cb1a8 100755
1467 import createrepo.merge
1468 +from createrepo.utils import MDError
1469 from optparse import OptionParser
1472 @@ -47,6 +48,9 @@ def parse_args(args):
1473 help="Do not merge group(comps) metadata")
1474 parser.add_option("", "--noupdateinfo", default=False, action="store_true",
1475 help="Do not merge updateinfo metadata")
1476 + parser.add_option("--compress-type", default=None, dest="compress_type",
1477 + help="which compression type to use")
1479 (opts, argsleft) = parser.parse_args(args)
1481 if len(opts.repos) < 2:
1482 @@ -77,9 +81,14 @@ def main(args):
1483 rmbase.groups = False
1484 if opts.noupdateinfo:
1485 rmbase.updateinfo = False
1487 - rmbase.merge_repos()
1488 - rmbase.write_metadata()
1490 + if opts.compress_type:
1491 + rmbase.mdconf.compress_type = opts.compress_type
1493 + rmbase.merge_repos()
1494 + rmbase.write_metadata()
1495 + except MDError, e:
1496 + print >> sys.stderr, "Could not merge repos: %s" % e
1499 if __name__ == "__main__":
1501 diff --git a/modifyrepo.py b/modifyrepo.py
1502 index 17094a4..bf1eec0 100755
1507 -# This tools is used to insert arbitrary metadata into an RPM repository.
1508 +# This tool is used to manipulate arbitrary metadata in a RPM repository.
1510 # ./modifyrepo.py updateinfo.xml myrepo/repodata
1512 +# ./modifyrepo.py --remove updateinfo.xml myrepo/repodata
1514 # >>> from modifyrepo import RepoMetadata
1515 # >>> repomd = RepoMetadata('myrepo/repodata')
1516 # >>> repomd.add('updateinfo.xml')
1518 +# >>> repomd.remove('updateinfo.xml')
1520 # This program is free software; you can redistribute it and/or modify
1521 # it under the terms of the GNU General Public License as published by
1523 # (C) Copyright 2006 Red Hat, Inc.
1524 # Luke Macken <lmacken@redhat.com>
1525 # modified by Seth Vidal 2008
1526 +# modified by Daniel Mach 2011
1530 from createrepo import __version__
1531 -from createrepo.utils import checksum_and_rename, GzipFile, MDError
1532 +from createrepo.utils import checksum_and_rename, compressOpen, MDError
1533 +from createrepo.utils import _available_compression
1534 from yum.misc import checksum
1536 from yum.repoMDObject import RepoMD, RepoMDError, RepoData
1537 @@ -39,6 +45,8 @@ class RepoMetadata:
1538 self.repodir = os.path.abspath(repo)
1539 self.repomdxml = os.path.join(self.repodir, 'repomd.xml')
1540 self.checksum_type = 'sha256'
1541 + self.compress = False
1542 + self.compress_type = _available_compression[-1] # best available
1544 if not os.path.exists(self.repomdxml):
1545 raise MDError, '%s not found' % self.repomdxml
1546 @@ -49,6 +57,35 @@ class RepoMetadata:
1547 except RepoMDError, e:
1548 raise MDError, 'Could not parse %s' % self.repomdxml
1550 + def _get_mdtype(self, mdname, mdtype=None):
1551 + """ Get mdtype from existing mdtype or from a mdname. """
1554 + return mdname.split('.')[0]
1556 + def _print_repodata(self, repodata):
1557 + """ Print repodata details. """
1558 + print " type =", repodata.type
1559 + print " location =", repodata.location[1]
1560 + print " checksum =", repodata.checksum[1]
1561 + print " timestamp =", repodata.timestamp
1562 + print " open-checksum =", repodata.openchecksum[1]
1564 + def _write_repomd(self):
1565 + """ Write the updated repomd.xml. """
1566 + outmd = file(self.repomdxml, 'w')
1567 + outmd.write(self.repoobj.dump_xml())
1569 + print "Wrote:", self.repomdxml
1571 + def _remove_repodata_file(self, repodata):
1572 + """ Remove a file specified in repodata location """
1574 + os.remove(repodata.location[1])
1575 + except OSError, ex:
1577 + # continue on a missing file
1578 + raise MDError("could not remove file %s" % repodata.location[1])
1580 def add(self, metadata, mdtype=None):
1581 """ Insert arbitrary metadata into this repository.
1582 @@ -63,8 +100,8 @@ class RepoMetadata:
1583 mdname = 'updateinfo.xml'
1584 elif isinstance(metadata, str):
1585 if os.path.exists(metadata):
1586 - if metadata.endswith('.gz'):
1587 - oldmd = GzipFile(filename=metadata, mode='rb')
1588 + if metadata.split('.')[-1] in ('gz', 'bz2', 'xz'):
1589 + oldmd = compressOpen(metadata, mode='rb')
1591 oldmd = file(metadata, 'r')
1593 @@ -75,14 +112,19 @@ class RepoMetadata:
1595 raise MDError, 'invalid metadata type'
1597 + do_compress = False
1598 ## Compress the metadata and move it into the repodata
1599 - if not mdname.endswith('.gz'):
1602 - mdtype = mdname.split('.')[0]
1604 + if self.compress or not mdname.split('.')[-1] in ('gz', 'bz2', 'xz'):
1605 + do_compress = True
1606 + mdname += '.' + self.compress_type
1607 + mdtype = self._get_mdtype(mdname, mdtype)
1609 destmd = os.path.join(self.repodir, mdname)
1610 - newmd = GzipFile(filename=destmd, mode='wb')
1612 + newmd = compressOpen(destmd, mode='wb', compress_type=self.compress_type)
1614 + newmd = open(destmd, 'wb')
1618 print "Wrote:", destmd
1619 @@ -91,11 +133,8 @@ class RepoMetadata:
1620 csum, destmd = checksum_and_rename(destmd, self.checksum_type)
1621 base_destmd = os.path.basename(destmd)
1624 - ## Remove any stale metadata
1625 - if mdtype in self.repoobj.repoData:
1626 - del self.repoobj.repoData[mdtype]
1628 + # Remove any stale metadata
1629 + old_rd = self.repoobj.repoData.pop(mdtype, None)
1632 new_rd.type = mdtype
1633 @@ -105,18 +144,28 @@ class RepoMetadata:
1634 new_rd.size = str(os.stat(destmd).st_size)
1635 new_rd.timestamp = str(os.stat(destmd).st_mtime)
1636 self.repoobj.repoData[new_rd.type] = new_rd
1638 - print " type =", new_rd.type
1639 - print " location =", new_rd.location[1]
1640 - print " checksum =", new_rd.checksum[1]
1641 - print " timestamp =", new_rd.timestamp
1642 - print " open-checksum =", new_rd.openchecksum[1]
1644 - ## Write the updated repomd.xml
1645 - outmd = file(self.repomdxml, 'w')
1646 - outmd.write(self.repoobj.dump_xml())
1648 - print "Wrote:", self.repomdxml
1649 + self._print_repodata(new_rd)
1650 + self._write_repomd()
1652 + if old_rd is not None and old_rd.location[1] != new_rd.location[1]:
1653 + # remove the old file when overwriting metadata
1654 + # with the same mdtype but different location
1655 + self._remove_repodata_file(old_rd)
1657 + def remove(self, metadata, mdtype=None):
1658 + """ Remove metadata from this repository. """
1660 + mdtype = self._get_mdtype(mdname, mdtype)
1662 + old_rd = self.repoobj.repoData.pop(mdtype, None)
1663 + if old_rd is None:
1664 + print "Metadata not found: %s" % mdtype
1667 + self._remove_repodata_file(old_rd)
1669 + self._print_repodata(old_rd)
1670 + self._write_repomd()
1674 @@ -124,7 +173,13 @@ def main(args):
1676 parser.add_option("--mdtype", dest='mdtype',
1677 help="specific datatype of the metadata, will be derived from the filename if not specified")
1678 - parser.usage = "modifyrepo [options] <input_metadata> <output repodata>"
1679 + parser.add_option("--remove", action="store_true",
1680 + help="remove specified file from repodata")
1681 + parser.add_option("--compress", action="store_true", default=False,
1682 + help="compress the new repodata before adding it to the repo")
1683 + parser.add_option("--compress-type", dest='compress_type', default='gz',
1684 + help="compression format to use")
1685 + parser.usage = "modifyrepo [options] [--remove] <input_metadata> <output repodata>"
1687 (opts, argsleft) = parser.parse_args(args)
1688 if len(argsleft) != 2:
1689 @@ -137,11 +192,28 @@ def main(args):
1691 print "Could not access repository: %s" % str(e)
1695 + repomd.compress = opts.compress
1696 + if opts.compress_type in _available_compression:
1697 + repomd.compress_type = opts.compress_type
1702 + repomd.remove(metadata)
1703 + except MDError, ex:
1704 + print "Could not remove metadata: %s" % (metadata, str(ex))
1710 repomd.add(metadata, mdtype=opts.mdtype)
1712 print "Could not add metadata from file %s: %s" % (metadata, str(e))
1716 if __name__ == '__main__':
1717 ret = main(sys.argv[1:])
1718 diff --git a/worker.py b/worker.py
1719 index eb35ef7..fe6758f 100755
1722 @@ -5,6 +5,7 @@ import yum
1727 from optparse import OptionParser
1730 @@ -23,6 +24,8 @@ def main(args):
1731 parser = OptionParser()
1732 parser.add_option('--tmpmdpath', default=None,
1733 help="path where the outputs should be dumped for this worker")
1734 + parser.add_option('--pkglist', default=None,
1735 + help="file to read the pkglist from in lieu of all of them on the cli")
1736 parser.add_option("--pkgoptions", default=[], action='append',
1737 help="pkgoptions in the format of key=value")
1738 parser.add_option("--quiet", default=False, action='store_true',
1739 @@ -36,10 +39,6 @@ def main(args):
1740 opts, pkgs = parser.parse_args(args)
1741 external_data = {'_packagenumber': 1}
1743 - if not opts.tmpmdpath:
1744 - print >> sys.stderr, "tmpmdpath required for destination files"
1748 for strs in opts.pkgoptions:
1749 k,v = strs.split('=')
1750 @@ -64,15 +63,34 @@ def main(args):
1752 reldir = external_data['_reldir']
1753 ts = rpmUtils.transaction.initReadOnlyTransaction()
1754 - pri = open(opts.tmpmdpath + '/primary.xml' , 'w')
1755 - fl = open(opts.tmpmdpath + '/filelists.xml' , 'w')
1756 - other = open(opts.tmpmdpath + '/other.xml' , 'w')
1759 + if opts.tmpmdpath:
1760 + files = [open(opts.tmpmdpath + '/%s.xml' % i, 'w')
1761 + for i in ('primary', 'filelists', 'other')]
1763 + for fh, buf in zip(files, xml):
1767 + buf = ' '.join(str(len(i)) for i in xml)
1768 + sys.stdout.write('*** %s\n' % buf)
1770 + sys.stdout.write(buf)
1773 + for line in open(opts.pkglist,'r').readlines():
1774 + line = line.strip()
1775 + if re.match('^\s*\#.*', line) or re.match('^\s*$', line):
1779 + clog_limit=globalopts.get('clog_limit', None)
1780 + if clog_limit is not None:
1781 + clog_limit = int(clog_limit)
1782 for pkgfile in pkgs:
1783 pkgpath = reldir + '/' + pkgfile
1784 if not os.path.exists(pkgpath):
1785 print >> sys.stderr, "File not found: %s" % pkgpath
1790 @@ -80,20 +98,17 @@ def main(args):
1791 print "reading %s" % (pkgfile)
1793 pkg = createrepo.yumbased.CreateRepoPackage(ts, package=pkgpath,
1794 - external_data=external_data)
1795 - pri.write(pkg.xml_dump_primary_metadata())
1796 - fl.write(pkg.xml_dump_filelists_metadata())
1797 - other.write(pkg.xml_dump_other_metadata(clog_limit=
1798 - globalopts.get('clog_limit', None)))
1799 + sumtype=globalopts.get('sumtype', None),
1800 + external_data=external_data)
1801 + output(pkg.xml_dump_primary_metadata(),
1802 + pkg.xml_dump_filelists_metadata(),
1803 + pkg.xml_dump_other_metadata(clog_limit=clog_limit))
1804 except yum.Errors.YumBaseError, e:
1805 print >> sys.stderr, "Error: %s" % e
1809 external_data['_packagenumber']+=1
1815 if __name__ == "__main__":