From 491749563dcd0b55dca586c2278bc2975e134e29 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Elan=20Ruusam=C3=A4e?= Date: Mon, 21 Oct 2013 11:18:36 +0300 Subject: [PATCH] up to 0.10 --- createrepo-head.patch | 1884 ----------------------------------------- createrepo.spec | 8 +- 2 files changed, 3 insertions(+), 1889 deletions(-) delete mode 100644 createrepo-head.patch diff --git a/createrepo-head.patch b/createrepo-head.patch deleted file mode 100644 index c43f145..0000000 --- a/createrepo-head.patch +++ /dev/null @@ -1,1884 +0,0 @@ -diff --git a/Makefile b/Makefile -index 60bb9db..0b5738b 100644 ---- a/Makefile -+++ b/Makefile -@@ -1,4 +1,5 @@ - PKGNAME = createrepo -+ALIASES = mergerepo modifyrepo genpkgmetadata.py mergerepo.py modifyrepo.py - VERSION=$(shell awk '/Version:/ { print $$2 }' ${PKGNAME}.spec) - RELEASE=$(shell awk '/Release:/ { print $$2 }' ${PKGNAME}.spec) - CVSTAG=createrepo-$(subst .,_,$(VERSION)-$(RELEASE)) -@@ -26,6 +27,8 @@ docdir = - includedir = ${prefix}/include - oldincludedir = /usr/include - mandir = ${prefix}/share/man -+compdir = $(shell pkg-config --variable=completionsdir bash-completion) -+compdir := $(or $(compdir), "/etc/bash_completion.d") - - pkgdatadir = $(datadir)/$(PKGNAME) - pkglibdir = $(libdir)/$(PKGNAME) -@@ -33,7 +36,7 @@ pkgincludedir = $(includedir)/$(PKGNAME) - top_builddir = - - # all dirs --DIRS = $(DESTDIR)$(bindir) $(DESTDIR)$(sysconfdir)/bash_completion.d \ -+DIRS = $(DESTDIR)$(bindir) $(DESTDIR)$(compdir) \ - $(DESTDIR)$(pkgdatadir) $(DESTDIR)$(mandir) - - -@@ -65,7 +68,8 @@ check: - - install: all installdirs - $(INSTALL_MODULES) $(srcdir)/$(MODULES) $(DESTDIR)$(pkgdatadir) -- $(INSTALL_DATA) $(PKGNAME).bash $(DESTDIR)$(sysconfdir)/bash_completion.d -+ $(INSTALL_DATA) $(PKGNAME).bash $(DESTDIR)$(compdir)/$(PKGNAME) -+ (cd $(DESTDIR)$(compdir); for n in $(ALIASES); do ln -s $(PKGNAME) $$n; done) - for subdir in $(SUBDIRS) ; do \ - $(MAKE) -C $$subdir install VERSION=$(VERSION) PKGNAME=$(PKGNAME); \ - done -diff --git a/createrepo.bash b/createrepo.bash -index 54ac8b2..f5a8bb7 100644 ---- a/createrepo.bash -+++ b/createrepo.bash -@@ -1,11 +1,17 @@ - # bash completion for createrepo and friends - -+_cr_compress_type() -+{ -+ COMPREPLY=( $( compgen -W "$( ${1:-createrepo} --compress-type=FOO / 2>&1 \ -+ | sed -ne 's/,/ /g' -ne 's/.*[Cc]ompression.*://p' )" -- "$2" ) ) -+} -+ - _cr_createrepo() - { - COMPREPLY=() - - case $3 in -- --version|-h|--help|-u|--baseurl|--distro|--content|--repo|--workers|\ -+ --version|-h|--help|-u|--baseurl|--distro|--content|--repo|\ - --revision|-x|--excludes|--changelog-limit|--max-delta-rpm-size) - return 0 - ;; -@@ -30,10 +36,24 @@ _cr_createrepo() - COMPREPLY=( $( compgen -f -o plusdirs -X '!*.rpm' -- "$2" ) ) - return 0 - ;; -+ --retain-old-md) -+ COMPREPLY=( $( compgen -W '0 1 2 3 4 5 6 7 8 9' -- "$2" ) ) -+ return 0 -+ ;; - --num-deltas) - COMPREPLY=( $( compgen -W '1 2 3 4 5 6 7 8 9' -- "$2" ) ) - return 0 - ;; -+ --workers) -+ local min=2 max=$( getconf _NPROCESSORS_ONLN 2>/dev/null ) -+ [[ -z $max || $max -lt $min ]] && max=$min -+ COMPREPLY=( $( compgen -W "{1..$max}" -- "$2" ) ) -+ return 0 -+ ;; -+ --compress-type) -+ _cr_compress_type "$1" "$2" -+ return 0 -+ ;; - esac - - if [[ $2 == -* ]] ; then -@@ -42,9 +62,9 @@ _cr_createrepo() - --cachedir --checkts --no-database --update --update-md-path - --skip-stat --split --pkglist --includepkg --outputdir - --skip-symlinks --changelog-limit --unique-md-filenames -- --simple-md-filenames --distro --content --repo --revision --deltas -- --oldpackagedirs --num-deltas --read-pkgs-list -- --max-delta-rpm-size --workers' -- "$2" ) ) -+ --simple-md-filenames --retain-old-md --distro --content --repo -+ --revision --deltas --oldpackagedirs --num-deltas --read-pkgs-list -+ --max-delta-rpm-size --workers --compress-type' -- "$2" ) ) - else - COMPREPLY=( $( compgen -d -- "$2" ) ) - fi -@@ -63,10 +83,14 @@ _cr_mergerepo() - COMPREPLY=( $( compgen -d -- "$2" ) ) - return 0 - ;; -+ --compress-type) -+ _cr_compress_type "" "$2" -+ return 0 -+ ;; - esac - - COMPREPLY=( $( compgen -W '--version --help --repo --archlist --no-database -- --outputdir --nogroups --noupdateinfo' -- "$2" ) ) -+ --outputdir --nogroups --noupdateinfo --compress-type' -- "$2" ) ) - } && - complete -F _cr_mergerepo -o filenames mergerepo mergerepo.py - -@@ -78,17 +102,22 @@ _cr_modifyrepo() - --version|-h|--help|--mdtype) - return 0 - ;; -+ --compress-type) -+ _cr_compress_type "" "$2" -+ return 0 -+ ;; - esac - - if [[ $2 == -* ]] ; then -- COMPREPLY=( $( compgen -W '--version --help --mdtype' -- "$2" ) ) -+ COMPREPLY=( $( compgen -W '--version --help --mdtype --remove -+ --compress --compress-type' -- "$2" ) ) - return 0 - fi - - local i argnum=1 - for (( i=1; i < ${#COMP_WORDS[@]}-1; i++ )) ; do - if [[ ${COMP_WORDS[i]} != -* && -- ${COMP_WORDS[i-1]} != @(=|--mdtype) ]]; then -+ ${COMP_WORDS[i-1]} != @(=|--@(md|compress-)type) ]]; then - argnum=$(( argnum+1 )) - fi - done -diff --git a/createrepo.spec b/createrepo.spec -index 1e491cd..cdd8b6f 100644 ---- a/createrepo.spec -+++ b/createrepo.spec -@@ -1,4 +1,10 @@ - %{!?python_sitelib: %define python_sitelib %(python -c "from distutils.sysconfig import get_python_lib; print get_python_lib()")} -+# disable broken /usr/lib/rpm/brp-python-bytecompile -+%define __os_install_post %{nil} -+%define compdir %(pkg-config --variable=completionsdir bash-completion) -+%if "%{compdir}" == "" -+%define compdir "/etc/bash_completion.d" -+%endif - - Summary: Creates a common metadata repository - Name: createrepo -@@ -10,8 +16,9 @@ Source: %{name}-%{version}.tar.gz - URL: http://createrepo.baseurl.org/ - BuildRoot: %{_tmppath}/%{name}-%{version}root - BuildArchitectures: noarch -+BuildRequires: bash-completion - Requires: python >= 2.1, rpm-python, rpm >= 0:4.1.1, libxml2-python --Requires: yum-metadata-parser, yum >= 3.2.29, python-deltarpm -+Requires: yum-metadata-parser, yum >= 3.2.29, python-deltarpm, pyliblzma - - %description - This utility will generate a common metadata repository from a directory of -@@ -32,7 +39,7 @@ make DESTDIR=$RPM_BUILD_ROOT sysconfdir=%{_sysconfdir} install - %defattr(-, root, root) - %dir %{_datadir}/%{name} - %doc ChangeLog README COPYING COPYING.lib --%{_sysconfdir}/bash_completion.d/ -+%(dirname %{compdir}) - %{_datadir}/%{name}/* - %{_bindir}/%{name} - %{_bindir}/modifyrepo -@@ -43,6 +50,9 @@ make DESTDIR=$RPM_BUILD_ROOT sysconfdir=%{_sysconfdir} install - %{python_sitelib}/createrepo - - %changelog -+* Fri Sep 9 2011 Seth Vidal -+- add lzma dep -+ - * Wed Jan 26 2011 Seth Vidal - - bump to 0.9.9 - - add worker.py -diff --git a/createrepo/__init__.py b/createrepo/__init__.py -index 8f2538e..1b18a9f 100644 ---- a/createrepo/__init__.py -+++ b/createrepo/__init__.py -@@ -26,15 +26,16 @@ import tempfile - import stat - import fcntl - import subprocess -+from select import select - --from yum import misc, Errors, to_unicode --from yum.repoMDObject import RepoMD, RepoMDError, RepoData -+from yum import misc, Errors -+from yum.repoMDObject import RepoMD, RepoData - from yum.sqlutils import executeSQL - from yum.packageSack import MetaSack --from yum.packages import YumAvailablePackage, YumLocalPackage -+from yum.packages import YumAvailablePackage - - import rpmUtils.transaction --from utils import _, errorprint, MDError -+from utils import _, errorprint, MDError, lzma, _available_compression - import readMetadata - try: - import sqlite3 as sqlite -@@ -46,8 +47,9 @@ try: - except ImportError: - pass - --from utils import _gzipOpen, bzipFile, checkAndMakeDir, GzipFile, \ -+from utils import _gzipOpen, compressFile, compressOpen, checkAndMakeDir, GzipFile, \ - checksum_and_rename, split_list_into_equal_chunks -+from utils import num_cpus_online - import deltarpms - - __version__ = '0.9.9' -@@ -74,7 +76,7 @@ class MetaDataConfig(object): - self.deltadir = None - self.delta_relative = 'drpms/' - self.oldpackage_paths = [] # where to look for the old packages - -- self.deltafile = 'prestodelta.xml.gz' -+ self.deltafile = 'prestodelta.xml' - self.num_deltas = 1 # number of older versions to delta (max) - self.max_delta_rpm_size = 100000000 - self.update_md_path = None -@@ -86,9 +88,9 @@ class MetaDataConfig(object): - self.skip_symlinks = False - self.pkglist = [] - self.database_only = False -- self.primaryfile = 'primary.xml.gz' -- self.filelistsfile = 'filelists.xml.gz' -- self.otherfile = 'other.xml.gz' -+ self.primaryfile = 'primary.xml' -+ self.filelistsfile = 'filelists.xml' -+ self.otherfile = 'other.xml' - self.repomdfile = 'repomd.xml' - self.tempdir = '.repodata' - self.finaldir = 'repodata' -@@ -108,8 +110,10 @@ class MetaDataConfig(object): - self.collapse_glibc_requires = True - self.workers = 1 # number of workers to fork off to grab metadata from the pkgs - self.worker_cmd = '/usr/share/createrepo/worker.py' -- - #self.worker_cmd = './worker.py' # helpful when testing -+ self.retain_old_md = 0 -+ self.compress_type = 'compat' -+ - - class SimpleMDCallBack(object): - def errorlog(self, thing): -@@ -141,10 +145,23 @@ class MetaDataGenerator: - self.files = [] - self.rpmlib_reqs = {} - self.read_pkgs = [] -+ self.compat_compress = False - - if not self.conf.directory and not self.conf.directories: - raise MDError, "No directory given on which to run." -- -+ -+ if self.conf.compress_type == 'compat': -+ self.compat_compress = True -+ self.conf.compress_type = None -+ -+ if not self.conf.compress_type: -+ self.conf.compress_type = 'gz' -+ -+ if self.conf.compress_type not in utils._available_compression: -+ raise MDError, "Compression %s not available: Please choose from: %s" \ -+ % (self.conf.compress_type, ', '.join(utils._available_compression)) -+ -+ - if not self.conf.directories: # just makes things easier later - self.conf.directories = [self.conf.directory] - if not self.conf.directory: # ensure we have both in the config object -@@ -290,14 +307,13 @@ class MetaDataGenerator: - - def extension_visitor(filelist, dirname, names): - for fn in names: -+ fn = os.path.join(dirname, fn) - if os.path.isdir(fn): - continue - if self.conf.skip_symlinks and os.path.islink(fn): - continue - elif fn[-extlen:].lower() == '%s' % (ext): -- relativepath = dirname.replace(startdir, "", 1) -- relativepath = relativepath.lstrip("/") -- filelist.append(os.path.join(relativepath, fn)) -+ filelist.append(fn[len(startdir):]) - - filelist = [] - startdir = directory + '/' -@@ -311,7 +327,7 @@ class MetaDataGenerator: - def checkTimeStamps(self): - """check the timestamp of our target dir. If it is not newer than - the repodata return False, else True""" -- if self.conf.checkts: -+ if self.conf.checkts and self.conf.mdtimestamp: - dn = os.path.join(self.conf.basedir, self.conf.directory) - files = self.getFileList(dn, '.rpm') - files = self.trimRpms(files) -@@ -410,9 +426,11 @@ class MetaDataGenerator: - - def _setupPrimary(self): - # setup the primary metadata file -+ # FIXME - make this be conf.compress_type once y-m-p is fixed -+ fpz = self.conf.primaryfile + '.' + 'gz' - primaryfilepath = os.path.join(self.conf.outputdir, self.conf.tempdir, -- self.conf.primaryfile) -- fo = _gzipOpen(primaryfilepath, 'w') -+ fpz) -+ fo = compressOpen(primaryfilepath, 'w', 'gz') - fo.write('\n') - fo.write('' % -@@ -421,9 +439,11 @@ class MetaDataGenerator: - - def _setupFilelists(self): - # setup the filelist file -+ # FIXME - make this be conf.compress_type once y-m-p is fixed -+ fpz = self.conf.filelistsfile + '.' + 'gz' - filelistpath = os.path.join(self.conf.outputdir, self.conf.tempdir, -- self.conf.filelistsfile) -- fo = _gzipOpen(filelistpath, 'w') -+ fpz) -+ fo = compressOpen(filelistpath, 'w', 'gz') - fo.write('\n') - fo.write('' % self.pkgcount) -@@ -431,9 +451,11 @@ class MetaDataGenerator: - - def _setupOther(self): - # setup the other file -+ # FIXME - make this be conf.compress_type once y-m-p is fixed -+ fpz = self.conf.otherfile + '.' + 'gz' - otherfilepath = os.path.join(self.conf.outputdir, self.conf.tempdir, -- self.conf.otherfile) -- fo = _gzipOpen(otherfilepath, 'w') -+ fpz) -+ fo = compressOpen(otherfilepath, 'w', 'gz') - fo.write('\n') - fo.write('' % -@@ -442,9 +464,10 @@ class MetaDataGenerator: - - def _setupDelta(self): - # setup the other file -+ fpz = self.conf.deltafile + '.' + self.conf.compress_type - deltafilepath = os.path.join(self.conf.outputdir, self.conf.tempdir, -- self.conf.deltafile) -- fo = _gzipOpen(deltafilepath, 'w') -+ fpz) -+ fo = compressOpen(deltafilepath, 'w', self.conf.compress_type) - fo.write('\n') - fo.write('\n') - return fo -@@ -520,6 +543,7 @@ class MetaDataGenerator: - # go on their merry way - - newpkgs = [] -+ keptpkgs = [] - if self.conf.update: - # if we're in --update mode then only act on the new/changed pkgs - for pkg in pkglist: -@@ -530,39 +554,13 @@ class MetaDataGenerator: - old_pkg = pkg - if pkg.find("://") != -1: - old_pkg = os.path.basename(pkg) -- nodes = self.oldData.getNodes(old_pkg) -- if nodes is not None: # we have a match in the old metadata -+ old_po = self.oldData.getNodes(old_pkg) -+ if old_po: # we have a match in the old metadata - if self.conf.verbose: - self.callback.log(_("Using data from old metadata for %s") - % pkg) -- (primarynode, filenode, othernode) = nodes -- -- for node, outfile in ((primarynode, self.primaryfile), -- (filenode, self.flfile), -- (othernode, self.otherfile)): -- if node is None: -- break -- -- if self.conf.baseurl: -- anode = node.children -- while anode is not None: -- if anode.type != "element": -- anode = anode.next -- continue -- if anode.name == "location": -- anode.setProp('xml:base', self.conf.baseurl) -- anode = anode.next -- -- output = node.serialize('UTF-8', self.conf.pretty) -- if output: -- outfile.write(output) -- else: -- if self.conf.verbose: -- self.callback.log(_("empty serialize on write to" \ -- "%s in %s") % (outfile, pkg)) -- outfile.write('\n') -- -- self.oldData.freeNodes(pkg) -+ keptpkgs.append((pkg, old_po)) -+ - #FIXME - if we're in update and we have deltas enabled - # check the presto data for this pkg and write its info back out - # to our deltafile -@@ -584,32 +582,45 @@ class MetaDataGenerator: - po = None - if isinstance(pkg, YumAvailablePackage): - po = pkg -- self.read_pkgs.append(po.localpath) -+ self.read_pkgs.append(po.localPkg()) - - # if we're dealing with remote pkgs - pitch it over to doing - # them one at a time, for now. - elif pkg.find('://') != -1: -- po = self.read_in_package(pkgfile, pkgpath=pkgpath, reldir=reldir) -+ po = self.read_in_package(pkg, pkgpath=pkgpath, reldir=reldir) - self.read_pkgs.append(pkg) - - if po: -- self.primaryfile.write(po.xml_dump_primary_metadata()) -- self.flfile.write(po.xml_dump_filelists_metadata()) -- self.otherfile.write(po.xml_dump_other_metadata( -- clog_limit=self.conf.changelog_limit)) -+ keptpkgs.append((pkg, po)) - continue - - pkgfiles.append(pkg) -- -- -+ -+ keptpkgs.sort(reverse=True) -+ # keptkgs is a list of (filename, po), pkgfiles is a list if filenames. -+ # Need to write them in sorted(filename) order. We loop over pkgfiles, -+ # inserting keptpkgs in right spots (using the upto argument). -+ def save_keptpkgs(upto): -+ while keptpkgs and (upto is None or keptpkgs[-1][0] < upto): -+ filename, po = keptpkgs.pop() -+ # reset baseurl in the old pkg -+ po.basepath = self.conf.baseurl -+ self.primaryfile.write(po.xml_dump_primary_metadata()) -+ self.flfile.write(po.xml_dump_filelists_metadata()) -+ self.otherfile.write(po.xml_dump_other_metadata( -+ clog_limit=self.conf.changelog_limit)) -+ - if pkgfiles: - # divide that list by the number of workers and fork off that many - # workers to tmpdirs - # waitfor the workers to finish and as each one comes in - # open the files they created and write them out to our metadata - # add up the total pkg counts and return that value -- worker_tmp_path = tempfile.mkdtemp() -- worker_chunks = utils.split_list_into_equal_chunks(pkgfiles, self.conf.workers) -+ self._worker_tmp_path = tempfile.mkdtemp() # setting this in the base object so we can clean it up later -+ if self.conf.workers < 1: -+ self.conf.workers = num_cpus_online() -+ pkgfiles.sort() -+ worker_chunks = split_list_into_equal_chunks(pkgfiles, self.conf.workers) - worker_cmd_dict = {} - worker_jobs = {} - base_worker_cmdline = [self.conf.worker_cmd, -@@ -617,7 +628,8 @@ class MetaDataGenerator: - '--pkgoptions=_collapse_libc_requires=%s' % self.conf.collapse_glibc_requires, - '--pkgoptions=_cachedir=%s' % self.conf.cachedir, - '--pkgoptions=_baseurl=%s' % self.conf.baseurl, -- '--globalopts=clog_limit=%s' % self.conf.changelog_limit,] -+ '--globalopts=clog_limit=%s' % self.conf.changelog_limit, -+ '--globalopts=sumtype=%s' % self.conf.sumtype, ] - - if self.conf.quiet: - base_worker_cmdline.append('--quiet') -@@ -626,15 +638,14 @@ class MetaDataGenerator: - base_worker_cmdline.append('--verbose') - - for worker_num in range(self.conf.workers): -- # make the worker directory -+ pkl = self._worker_tmp_path + '/pkglist-%s' % worker_num -+ f = open(pkl, 'w') -+ f.write('\n'.join(worker_chunks[worker_num])) -+ f.close() -+ - workercmdline = [] - workercmdline.extend(base_worker_cmdline) -- thisdir = worker_tmp_path + '/' + str(worker_num) -- if checkAndMakeDir(thisdir): -- workercmdline.append('--tmpmdpath=%s' % thisdir) -- else: -- raise MDError, "Unable to create worker path: %s" % thisdir -- workercmdline.extend(worker_chunks[worker_num]) -+ workercmdline.append('--pkglist=%s/pkglist-%s' % (self._worker_tmp_path, worker_num)) - worker_cmd_dict[worker_num] = workercmdline - - -@@ -647,49 +658,60 @@ class MetaDataGenerator: - stderr=subprocess.PIPE) - worker_jobs[num] = job - -- gimmebreak = 0 -- while gimmebreak != len(worker_jobs.keys()): -- gimmebreak = 0 -- for (num,job) in worker_jobs.items(): -- if job.poll() is not None: -- gimmebreak+=1 -- line = job.stdout.readline() -- if line: -+ files = self.primaryfile, self.flfile, self.otherfile -+ def log_messages(num): -+ job = worker_jobs[num] -+ while True: -+ # check stdout and stderr -+ for stream in select((job.stdout, job.stderr), (), ())[0]: -+ line = stream.readline() -+ if line: break -+ else: -+ return # EOF, EOF -+ if stream is job.stdout: -+ if line.startswith('*** '): -+ # get data, save to local files -+ for out, size in zip(files, line[4:].split()): -+ out.write(stream.read(int(size))) -+ return - self.callback.log('Worker %s: %s' % (num, line.rstrip())) -- line = job.stderr.readline() -- if line: -+ else: - self.callback.errorlog('Worker %s: %s' % (num, line.rstrip())) -+ -+ for i, pkg in enumerate(pkgfiles): -+ # insert cached packages -+ save_keptpkgs(pkg) -+ -+ # save output to local files -+ log_messages(i % self.conf.workers) -+ -+ for (num, job) in worker_jobs.items(): -+ # process remaining messages on stderr -+ log_messages(num) -+ -+ if job.wait() != 0: -+ msg = "Worker exited with non-zero value: %s. Fatal." % job.returncode -+ self.callback.errorlog(msg) -+ raise MDError, msg - -- - if not self.conf.quiet: - self.callback.log("Workers Finished") -- # finished with workers -- # go to their dirs and add the contents -- if not self.conf.quiet: -- self.callback.log("Gathering worker results") -- for num in range(self.conf.workers): -- for (fn, fo) in (('primary.xml', self.primaryfile), -- ('filelists.xml', self.flfile), -- ('other.xml', self.otherfile)): -- fnpath = worker_tmp_path + '/' + str(num) + '/' + fn -- if os.path.exists(fnpath): -- fo.write(open(fnpath, 'r').read()) -- - - for pkgfile in pkgfiles: - if self.conf.deltas: -- po = self.read_in_package(pkgfile, pkgpath=pkgpath, reldir=reldir) -- self._do_delta_rpm_package(po) -+ try: -+ po = self.read_in_package(pkgfile, pkgpath=pkgpath, reldir=reldir) -+ self._do_delta_rpm_package(po) -+ except MDError, e: -+ errorprint(e) -+ continue - self.read_pkgs.append(pkgfile) - -+ save_keptpkgs(None) # append anything left - return self.current_pkg - - - def closeMetadataDocs(self): -- if not self.conf.quiet: -- self.callback.log('') -- -- - # save them up to the tmp locations: - if not self.conf.quiet: - self.callback.log(_('Saving Primary metadata')) -@@ -784,7 +806,6 @@ class MetaDataGenerator: - return self._old_package_dict - - self._old_package_dict = {} -- opl = [] - for d in self.conf.oldpackage_paths: - for f in self.getFileList(d, '.rpm'): - fp = d + '/' + f -@@ -833,7 +854,7 @@ class MetaDataGenerator: - return ' '.join(results) - - def _createRepoDataObject(self, mdfile, mdtype, compress=True, -- compress_type='gzip', attribs={}): -+ compress_type=None, attribs={}): - """return random metadata as RepoData object to be added to RepoMD - mdfile = complete path to file - mdtype = the metadata type to use -@@ -843,15 +864,13 @@ class MetaDataGenerator: - sfile = os.path.basename(mdfile) - fo = open(mdfile, 'r') - outdir = os.path.join(self.conf.outputdir, self.conf.tempdir) -+ if not compress_type: -+ compress_type = self.conf.compress_type - if compress: -- if compress_type == 'gzip': -- sfile = '%s.gz' % sfile -- outfn = os.path.join(outdir, sfile) -- output = GzipFile(filename = outfn, mode='wb') -- elif compress_type == 'bzip2': -- sfile = '%s.bz2' % sfile -- outfn = os.path.join(outdir, sfile) -- output = BZ2File(filename = outfn, mode='wb') -+ sfile = '%s.%s' % (sfile, compress_type) -+ outfn = os.path.join(outdir, sfile) -+ output = compressOpen(outfn, mode='wb', compress_type=compress_type) -+ - else: - outfn = os.path.join(outdir, sfile) - output = open(outfn, 'w') -@@ -874,14 +893,13 @@ class MetaDataGenerator: - - thisdata = RepoData() - thisdata.type = mdtype -- baseloc = None - thisdata.location = (self.conf.baseurl, os.path.join(self.conf.finaldir, sfile)) - thisdata.checksum = (self.conf.sumtype, csum) - if compress: - thisdata.openchecksum = (self.conf.sumtype, open_csum) - - thisdata.size = str(os.stat(outfn).st_size) -- thisdata.timestamp = str(os.stat(outfn).st_mtime) -+ thisdata.timestamp = str(int(os.stat(outfn).st_mtime)) - for (k, v) in attribs.items(): - setattr(thisdata, k, str(v)) - -@@ -925,9 +943,14 @@ class MetaDataGenerator: - rp = sqlitecachec.RepodataParserSqlite(repopath, repomd.repoid, None) - - for (rpm_file, ftype) in workfiles: -+ # when we fix y-m-p and non-gzipped xml files - then we can make this just add -+ # self.conf.compress_type -+ if ftype in ('other', 'filelists', 'primary'): -+ rpm_file = rpm_file + '.' + 'gz' -+ elif rpm_file.find('.') != -1 and rpm_file.split('.')[-1] not in _available_compression: -+ rpm_file = rpm_file + '.' + self.conf.compress_type - complete_path = os.path.join(repopath, rpm_file) -- -- zfo = _gzipOpen(complete_path) -+ zfo = compressOpen(complete_path) - # This is misc.checksum() done locally so we can get the size too. - data = misc.Checksums([sumtype]) - while data.read(zfo, 2**16): -@@ -966,14 +989,20 @@ class MetaDataGenerator: - good_name = '%s.sqlite' % ftype - resultpath = os.path.join(repopath, good_name) - -+ # compat compression for rhel5 compatibility from fedora :( -+ compress_type = self.conf.compress_type -+ if self.compat_compress: -+ compress_type = 'bz2' -+ - # rename from silly name to not silly name - os.rename(tmp_result_path, resultpath) -- compressed_name = '%s.bz2' % good_name -+ compressed_name = '%s.%s' % (good_name, compress_type) - result_compressed = os.path.join(repopath, compressed_name) - db_csums[ftype] = misc.checksum(sumtype, resultpath) - - # compress the files -- bzipFile(resultpath, result_compressed) -+ -+ compressFile(resultpath, result_compressed, compress_type) - # csum the compressed file - db_compressed_sums[ftype] = misc.checksum(sumtype, - result_compressed) -@@ -983,8 +1012,8 @@ class MetaDataGenerator: - os.unlink(resultpath) - - if self.conf.unique_md_filenames: -- csum_compressed_name = '%s-%s.bz2' % ( -- db_compressed_sums[ftype], good_name) -+ csum_compressed_name = '%s-%s.%s' % ( -+ db_compressed_sums[ftype], good_name, compress_type) - csum_result_compressed = os.path.join(repopath, - csum_compressed_name) - os.rename(result_compressed, csum_result_compressed) -@@ -1001,7 +1030,7 @@ class MetaDataGenerator: - data.location = (self.conf.baseurl, - os.path.join(self.conf.finaldir, compressed_name)) - data.checksum = (sumtype, db_compressed_sums[ftype]) -- data.timestamp = str(db_stat.st_mtime) -+ data.timestamp = str(int(db_stat.st_mtime)) - data.size = str(db_stat.st_size) - data.opensize = str(un_stat.st_size) - data.openchecksum = (sumtype, db_csums[ftype]) -@@ -1020,7 +1049,13 @@ class MetaDataGenerator: - data.openchecksum = (sumtype, uncsum) - - if self.conf.unique_md_filenames: -- res_file = '%s-%s.xml.gz' % (csum, ftype) -+ if ftype in ('primary', 'filelists', 'other'): -+ compress = 'gz' -+ else: -+ compress = self.conf.compress_type -+ -+ main_name = '.'.join(rpm_file.split('.')[:-1]) -+ res_file = '%s-%s.%s' % (csum, main_name, compress) - orig_file = os.path.join(repopath, rpm_file) - dest_file = os.path.join(repopath, res_file) - os.rename(orig_file, dest_file) -@@ -1046,7 +1081,7 @@ class MetaDataGenerator: - - - if self.conf.additional_metadata: -- for md_type, mdfile in self.conf.additional_metadata.items(): -+ for md_type, md_file in self.conf.additional_metadata.items(): - mdcontent = self._createRepoDataObject(md_file, md_type) - repomd.repoData[mdcontent.type] = mdcontent - -@@ -1110,23 +1145,43 @@ class MetaDataGenerator: - raise MDError, _( - 'Could not remove old metadata file: %s: %s') % (oldfile, e) - -- # Move everything else back from olddir (eg. repoview files) -- try: -- old_contents = os.listdir(output_old_dir) -- except (OSError, IOError), e: -- old_contents = [] -- -+ old_to_remove = [] -+ old_pr = [] -+ old_fl = [] -+ old_ot = [] -+ old_pr_db = [] -+ old_fl_db = [] -+ old_ot_db = [] - for f in os.listdir(output_old_dir): - oldfile = os.path.join(output_old_dir, f) - finalfile = os.path.join(output_final_dir, f) -- if f.find('-') != -1 and f.split('-')[1] in ('primary.sqlite.bz2', -- 'filelists.sqlite.bz2', 'primary.xml.gz','other.sqlite.bz2', -- 'other.xml.gz','filelists.xml.gz'): -- os.remove(oldfile) # kill off the old ones -- continue -- if f in ('filelists.sqlite.bz2', 'other.sqlite.bz2', -- 'primary.sqlite.bz2'): -- os.remove(oldfile) -+ -+ for (end,lst) in (('-primary.sqlite', old_pr_db), ('-primary.xml', old_pr), -+ ('-filelists.sqlite', old_fl_db), ('-filelists.xml', old_fl), -+ ('-other.sqlite', old_ot_db), ('-other.xml', old_ot)): -+ fn = '.'.join(f.split('.')[:-1]) -+ if fn.endswith(end): -+ lst.append(oldfile) -+ break -+ -+ # make a list of the old metadata files we don't want to remove. -+ for lst in (old_pr, old_fl, old_ot, old_pr_db, old_fl_db, old_ot_db): -+ sortlst = sorted(lst, key=lambda x: os.path.getmtime(x), -+ reverse=True) -+ for thisf in sortlst[self.conf.retain_old_md:]: -+ old_to_remove.append(thisf) -+ -+ for f in os.listdir(output_old_dir): -+ oldfile = os.path.join(output_old_dir, f) -+ finalfile = os.path.join(output_final_dir, f) -+ fn = '.'.join(f.split('.')[:-1]) -+ if fn in ('filelists.sqlite', 'other.sqlite', -+ 'primary.sqlite') or oldfile in old_to_remove: -+ try: -+ os.remove(oldfile) -+ except (OSError, IOError), e: -+ raise MDError, _( -+ 'Could not remove old metadata file: %s: %s') % (oldfile, e) - continue - - if os.path.exists(finalfile): -@@ -1147,14 +1202,19 @@ class MetaDataGenerator: - msg += _('Error was %s') % e - raise MDError, msg - -- try: -- os.rmdir(output_old_dir) -- except OSError, e: -- self.errorlog(_('Could not remove old metadata dir: %s') -- % self.conf.olddir) -- self.errorlog(_('Error was %s') % e) -- self.errorlog(_('Please clean up this directory manually.')) -+ self._cleanup_tmp_repodata_dir() -+ self._cleanup_update_tmp_dir() -+ self._write_out_read_pkgs_list() -+ - -+ def _cleanup_update_tmp_dir(self): -+ if not self.conf.update: -+ return -+ -+ shutil.rmtree(self.oldData._repo.basecachedir, ignore_errors=True) -+ shutil.rmtree(self.oldData._repo.base_persistdir, ignore_errors=True) -+ -+ def _write_out_read_pkgs_list(self): - # write out the read_pkgs_list file with self.read_pkgs - if self.conf.read_pkgs_list: - try: -@@ -1167,6 +1227,23 @@ class MetaDataGenerator: - % self.conf.read_pkgs_list) - self.errorlog(_('Error was %s') % e) - -+ def _cleanup_tmp_repodata_dir(self): -+ output_old_dir = os.path.join(self.conf.outputdir, self.conf.olddir) -+ output_temp_dir = os.path.join(self.conf.outputdir, self.conf.tempdir) -+ for dirbase in (self.conf.olddir, self.conf.tempdir): -+ dirpath = os.path.join(self.conf.outputdir, dirbase) -+ if os.path.exists(dirpath): -+ try: -+ os.rmdir(dirpath) -+ except OSError, e: -+ self.errorlog(_('Could not remove temp metadata dir: %s') -+ % dirbase) -+ self.errorlog(_('Error was %s') % e) -+ self.errorlog(_('Please clean up this directory manually.')) -+ # our worker tmp path -+ if hasattr(self, '_worker_tmp_path') and os.path.exists(self._worker_tmp_path): -+ shutil.rmtree(self._worker_tmp_path, ignore_errors=True) -+ - def setup_sqlite_dbs(self, initdb=True): - """sets up the sqlite dbs w/table schemas and db_infos""" - destdir = os.path.join(self.conf.outputdir, self.conf.tempdir) -@@ -1194,24 +1271,6 @@ class SplitMetaDataGenerator(MetaDataGenerator): - (scheme, netloc, path, query, fragid) = urlparse.urlsplit(url) - return urlparse.urlunsplit((scheme, netloc, path, query, str(fragment))) - -- def getFileList(self, directory, ext): -- -- extlen = len(ext) -- -- def extension_visitor(arg, dirname, names): -- for fn in names: -- if os.path.isdir(fn): -- continue -- elif fn[-extlen:].lower() == '%s' % (ext): -- reldir = os.path.basename(dirname) -- if reldir == os.path.basename(directory): -- reldir = "" -- arg.append(os.path.join(reldir, fn)) -- -- rpmlist = [] -- os.path.walk(directory, extension_visitor, rpmlist) -- return rpmlist -- - def doPkgMetadata(self): - """all the heavy lifting for the package metadata""" - if len(self.conf.directories) == 1: -@@ -1232,6 +1291,19 @@ class SplitMetaDataGenerator(MetaDataGenerator): - thisdir = os.path.join(self.conf.basedir, mydir) - - filematrix[mydir] = self.getFileList(thisdir, '.rpm') -+ -+ # pkglist is a bit different for split media, as we have to know -+ # which dir. it belongs to. So we walk the dir. and then filter. -+ # We could be faster by not walking the dir. ... but meh. -+ if self.conf.pkglist: -+ pkglist = set(self.conf.pkglist) -+ pkgs = [] -+ for fname in filematrix[mydir]: -+ if fname not in pkglist: -+ continue -+ pkgs.append(fname) -+ filematrix[mydir] = pkgs -+ - self.trimRpms(filematrix[mydir]) - self.pkgcount += len(filematrix[mydir]) - -@@ -1240,7 +1312,6 @@ class SplitMetaDataGenerator(MetaDataGenerator): - self.conf.baseurl = self._getFragmentUrl(self.conf.baseurl, mediano) - try: - self.openMetadataDocs() -- original_basedir = self.conf.basedir - for mydir in self.conf.directories: - self.conf.baseurl = self._getFragmentUrl(self.conf.baseurl, mediano) - self.writeMetadataDocs(filematrix[mydir], mydir) -diff --git a/createrepo/merge.py b/createrepo/merge.py -index b3b2ea1..1ac43bb 100644 ---- a/createrepo/merge.py -+++ b/createrepo/merge.py -@@ -24,6 +24,7 @@ from yum.misc import unique, getCacheDir - import yum.update_md - import rpmUtils.arch - import operator -+from utils import MDError - import createrepo - import tempfile - -@@ -84,6 +85,8 @@ class RepoMergeBase: - # in the repolist - count = 0 - for r in self.repolist: -+ if r[0] == '/': -+ r = 'file://' + r # just fix the file repos, this is silly. - count +=1 - rid = 'repo%s' % count - n = self.yumbase.add_enable_repo(rid, baseurls=[r], -@@ -92,7 +95,10 @@ class RepoMergeBase: - n._merge_rank = count - - #setup our sacks -- self.yumbase._getSacks(archlist=self.archlist) -+ try: -+ self.yumbase._getSacks(archlist=self.archlist) -+ except yum.Errors.RepoError, e: -+ raise MDError, "Could not setup merge repo pkgsack: %s" % e - - myrepos = self.yumbase.repos.listEnabled() - -@@ -102,11 +108,16 @@ class RepoMergeBase: - def write_metadata(self, outputdir=None): - mytempdir = tempfile.mkdtemp() - if self.groups: -- comps_fn = mytempdir + '/groups.xml' -- compsfile = open(comps_fn, 'w') -- compsfile.write(self.yumbase.comps.xml()) -- compsfile.close() -- self.mdconf.groupfile=comps_fn -+ try: -+ comps_fn = mytempdir + '/groups.xml' -+ compsfile = open(comps_fn, 'w') -+ compsfile.write(self.yumbase.comps.xml()) -+ compsfile.close() -+ except yum.Errors.GroupsError, e: -+ # groups not being available shouldn't be a fatal error -+ pass -+ else: -+ self.mdconf.groupfile=comps_fn - - if self.updateinfo: - ui_fn = mytempdir + '/updateinfo.xml' -diff --git a/createrepo/readMetadata.py b/createrepo/readMetadata.py -index 27d3690..54863cb 100644 ---- a/createrepo/readMetadata.py -+++ b/createrepo/readMetadata.py -@@ -16,11 +16,25 @@ - # Copyright 2006 Red Hat - - import os --import libxml2 - import stat - from utils import errorprint, _ - --from yum import repoMDObject -+import yum -+from yum import misc -+from yum.Errors import YumBaseError -+import tempfile -+class CreaterepoPkgOld(yum.sqlitesack.YumAvailablePackageSqlite): -+ # special for special people like us. -+ def _return_remote_location(self): -+ -+ if self.basepath: -+ msg = """\n""" % ( -+ misc.to_xml(self.basepath, attrib=True), -+ misc.to_xml(self.relativepath, attrib=True)) -+ else: -+ msg = """\n""" % misc.to_xml(self.relativepath, attrib=True) -+ -+ return msg - - - class MetadataIndex(object): -@@ -30,178 +44,72 @@ class MetadataIndex(object): - opts = {} - self.opts = opts - self.outputdir = outputdir -+ realpath = os.path.realpath(outputdir) - repodatadir = self.outputdir + '/repodata' -- myrepomdxml = repodatadir + '/repomd.xml' -- if os.path.exists(myrepomdxml): -- repomd = repoMDObject.RepoMD('garbageid', myrepomdxml) -- b = repomd.getData('primary').location[1] -- f = repomd.getData('filelists').location[1] -- o = repomd.getData('other').location[1] -- basefile = os.path.join(self.outputdir, b) -- filelistfile = os.path.join(self.outputdir, f) -- otherfile = os.path.join(self.outputdir, o) -- else: -- basefile = filelistfile = otherfile = "" -- -- self.files = {'base' : basefile, -- 'filelist' : filelistfile, -- 'other' : otherfile} -- self.scan() -+ self._repo = yum.yumRepo.YumRepository('garbageid') -+ self._repo.baseurl = 'file://' + realpath -+ self._repo.basecachedir = tempfile.mkdtemp(dir='/var/tmp', prefix="createrepo") -+ self._repo.base_persistdir = tempfile.mkdtemp(dir='/var/tmp', prefix="createrepo-p") -+ self._repo.metadata_expire = 1 -+ self._repo.gpgcheck = 0 -+ self._repo.repo_gpgcheck = 0 -+ self._repo._sack = yum.sqlitesack.YumSqlitePackageSack(CreaterepoPkgOld) -+ self.pkg_tups_by_path = {} -+ try: -+ self.scan() -+ except YumBaseError, e: -+ print "Could not find valid repo at: %s" % self.outputdir -+ - - def scan(self): -- """Read in and index old repo data""" -- self.basenodes = {} -- self.filesnodes = {} -- self.othernodes = {} -- self.pkg_ids = {} -+ """Read in old repodata""" - if self.opts.get('verbose'): - print _("Scanning old repo data") -- for fn in self.files.values(): -- if not os.path.exists(fn): -- #cannot scan -- errorprint(_("Warning: Old repodata file missing: %s") % fn) -- return -- root = libxml2.parseFile(self.files['base']).getRootElement() -- self._scanPackageNodes(root, self._handleBase) -- if self.opts.get('verbose'): -- print _("Indexed %i base nodes" % len(self.basenodes)) -- root = libxml2.parseFile(self.files['filelist']).getRootElement() -- self._scanPackageNodes(root, self._handleFiles) -- if self.opts.get('verbose'): -- print _("Indexed %i filelist nodes" % len(self.filesnodes)) -- root = libxml2.parseFile(self.files['other']).getRootElement() -- self._scanPackageNodes(root, self._handleOther) -- if self.opts.get('verbose'): -- print _("Indexed %i other nodes" % len(self.othernodes)) -- #reverse index pkg ids to track references -- self.pkgrefs = {} -- for relpath, pkgid in self.pkg_ids.iteritems(): -- self.pkgrefs.setdefault(pkgid,[]).append(relpath) -- -- def _scanPackageNodes(self, root, handler): -- node = root.children -- while node is not None: -- if node.type != "element": -- node = node.next -+ self._repo.sack.populate(self._repo, 'all', None, False) -+ for thispo in self._repo.sack: -+ mtime = thispo.filetime -+ size = thispo.size -+ relpath = thispo.relativepath -+ do_stat = self.opts.get('do_stat', True) -+ if mtime is None: -+ print _("mtime missing for %s") % relpath - continue -- if node.name == "package": -- handler(node) -- node = node.next -- -- def _handleBase(self, node): -- top = node -- node = node.children -- pkgid = None -- mtime = None -- size = None -- relpath = None -- do_stat = self.opts.get('do_stat', True) -- while node is not None: -- if node.type != "element": -- node = node.next -+ if size is None: -+ print _("size missing for %s") % relpath - continue -- if node.name == "checksum": -- pkgid = node.content -- elif node.name == "time": -- mtime = int(node.prop('file')) -- elif node.name == "size": -- size = int(node.prop('package')) -- elif node.name == "location": -- relpath = node.prop('href') -- node = node.next -- if relpath is None: -- print _("Incomplete data for node") -- return -- if pkgid is None: -- print _("pkgid missing for %s") % relpath -- return -- if mtime is None: -- print _("mtime missing for %s") % relpath -- return -- if size is None: -- print _("size missing for %s") % relpath -- return -- if do_stat: -- filepath = os.path.join(self.opts['pkgdir'], relpath) -- try: -- st = os.stat(filepath) -- except OSError: -- #file missing -- ignore -- return -- if not stat.S_ISREG(st.st_mode): -- #ignore non files -- return -- #check size and mtime -- if st.st_size != size: -- if self.opts.get('verbose'): -- print _("Size (%i -> %i) changed for file %s") % (size,st.st_size,filepath) -- return -- if int(st.st_mtime) != mtime: -- if self.opts.get('verbose'): -- print _("Modification time changed for %s") % filepath -- return -- #otherwise we index -- self.basenodes[relpath] = top -- self.pkg_ids[relpath] = pkgid -- -- def _handleFiles(self, node): -- pkgid = node.prop('pkgid') -- if pkgid: -- self.filesnodes[pkgid] = node -- -- def _handleOther(self, node): -- pkgid = node.prop('pkgid') -- if pkgid: -- self.othernodes[pkgid] = node -+ if do_stat: -+ filepath = os.path.join(self.opts['pkgdir'], relpath) -+ try: -+ st = os.stat(filepath) -+ except OSError: -+ #file missing -- ignore -+ continue -+ if not stat.S_ISREG(st.st_mode): -+ #ignore non files -+ continue -+ #check size and mtime -+ if st.st_size != size: -+ if self.opts.get('verbose'): -+ print _("Size (%i -> %i) changed for file %s") % (size,st.st_size,filepath) -+ continue -+ if int(st.st_mtime) != mtime: -+ if self.opts.get('verbose'): -+ print _("Modification time changed for %s") % filepath -+ continue -+ -+ self.pkg_tups_by_path[relpath] = thispo.pkgtup -+ - -- def getNodes(self, relpath): -- """Return base, filelist, and other nodes for file, if they exist - -- Returns a tuple of nodes, or None if not found -+ def getNodes(self, relpath): -+ """return a package object based on relative path of pkg - """ -- bnode = self.basenodes.get(relpath,None) -- if bnode is None: -- return None -- pkgid = self.pkg_ids.get(relpath,None) -- if pkgid is None: -- print _("No pkgid found for: %s") % relpath -- return None -- fnode = self.filesnodes.get(pkgid,None) -- if fnode is None: -- return None -- onode = self.othernodes.get(pkgid,None) -- if onode is None: -- return None -- return bnode, fnode, onode -- -- def freeNodes(self,relpath): -- #causing problems -- """Free up nodes corresponding to file, if possible""" -- bnode = self.basenodes.get(relpath,None) -- if bnode is None: -- print "Missing node for %s" % relpath -- return -- bnode.unlinkNode() -- bnode.freeNode() -- del self.basenodes[relpath] -- pkgid = self.pkg_ids.get(relpath,None) -- if pkgid is None: -- print _("No pkgid found for: %s") % relpath -- return None -- del self.pkg_ids[relpath] -- dups = self.pkgrefs.get(pkgid) -- dups.remove(relpath) -- if len(dups): -- #still referenced -- return -- del self.pkgrefs[pkgid] -- for nodes in self.filesnodes, self.othernodes: -- node = nodes.get(pkgid) -- if node is not None: -- node.unlinkNode() -- node.freeNode() -- del nodes[pkgid] -+ if relpath in self.pkg_tups_by_path: -+ pkgtup = self.pkg_tups_by_path[relpath] -+ return self._repo.sack.searchPkgTuple(pkgtup)[0] -+ return None - -+ - - if __name__ == "__main__": - cwd = os.getcwd() -@@ -209,9 +117,9 @@ if __name__ == "__main__": - 'pkgdir': cwd} - - idx = MetadataIndex(cwd, opts) -- for fn in idx.basenodes.keys(): -- a,b,c, = idx.getNodes(fn) -- a.serialize() -- b.serialize() -- c.serialize() -- idx.freeNodes(fn) -+ for fn in idx.pkg_tups_by_path: -+ po = idx.getNodes(fn) -+ print po.xml_dump_primary_metadata() -+ print po.xml_dump_filelists_metadata() -+ print po.xml_dump_other_metadata() -+ -diff --git a/createrepo/utils.py b/createrepo/utils.py -index 995c3b9..b0d92ec 100644 ---- a/createrepo/utils.py -+++ b/createrepo/utils.py -@@ -23,6 +23,12 @@ import bz2 - import gzip - from gzip import write32u, FNAME - from yum import misc -+_available_compression = ['gz', 'bz2'] -+try: -+ import lzma -+ _available_compression.append('xz') -+except ImportError: -+ lzma = None - - def errorprint(stuff): - print >> sys.stderr, stuff -@@ -34,22 +40,14 @@ def _(args): - - class GzipFile(gzip.GzipFile): - def _write_gzip_header(self): -+ # Generate a header that is easily reproduced with gzip -9 -n on -+ # an unix-like system - self.fileobj.write('\037\213') # magic header - self.fileobj.write('\010') # compression method -- if hasattr(self, 'name'): -- fname = self.name[:-3] -- else: -- fname = self.filename[:-3] -- flags = 0 -- if fname: -- flags = FNAME -- self.fileobj.write(chr(flags)) -- write32u(self.fileobj, long(0)) -- self.fileobj.write('\002') -- self.fileobj.write('\377') -- if fname: -- self.fileobj.write(fname + '\000') -- -+ self.fileobj.write('\000') # flags -+ write32u(self.fileobj, long(0)) # timestamp -+ self.fileobj.write('\002') # max compression -+ self.fileobj.write('\003') # UNIX - - def _gzipOpen(filename, mode="rb", compresslevel=9): - return GzipFile(filename, mode, compresslevel) -@@ -69,6 +67,75 @@ def bzipFile(source, dest): - s_fn.close() - - -+def xzFile(source, dest): -+ if not 'xz' in _available_compression: -+ raise MDError, "Cannot use xz for compression, library/module is not available" -+ -+ s_fn = open(source, 'rb') -+ destination = lzma.LZMAFile(dest, 'w') -+ -+ while True: -+ data = s_fn.read(1024000) -+ -+ if not data: break -+ destination.write(data) -+ -+ destination.close() -+ s_fn.close() -+ -+def gzFile(source, dest): -+ -+ s_fn = open(source, 'rb') -+ destination = GzipFile(dest, 'w') -+ -+ while True: -+ data = s_fn.read(1024000) -+ -+ if not data: break -+ destination.write(data) -+ -+ destination.close() -+ s_fn.close() -+ -+ -+class Duck: -+ def __init__(self, **attr): -+ self.__dict__ = attr -+ -+ -+def compressFile(source, dest, compress_type): -+ """Compress an existing file using any compression type from source to dest""" -+ -+ if compress_type == 'xz': -+ xzFile(source, dest) -+ elif compress_type == 'bz2': -+ bzipFile(source, dest) -+ elif compress_type == 'gz': -+ gzFile(source, dest) -+ else: -+ raise MDError, "Unknown compression type %s" % compress_type -+ -+def compressOpen(fn, mode='rb', compress_type=None): -+ -+ if not compress_type: -+ # we are readonly and we don't give a compress_type - then guess based on the file extension -+ compress_type = fn.split('.')[-1] -+ if compress_type not in _available_compression: -+ compress_type = 'gz' -+ -+ if compress_type == 'xz': -+ fh = lzma.LZMAFile(fn, mode) -+ if mode == 'w': -+ fh = Duck(write=lambda s, write=fh.write: s != '' and write(s), -+ close=fh.close) -+ return fh -+ elif compress_type == 'bz2': -+ return bz2.BZ2File(fn, mode) -+ elif compress_type == 'gz': -+ return _gzipOpen(fn, mode) -+ else: -+ raise MDError, "Unknown compression type %s" % compress_type -+ - def returnFD(filename): - try: - fdno = os.open(filename, os.O_RDONLY) -@@ -124,15 +191,28 @@ def encodefiletypelist(filetypelist): - return result - - def split_list_into_equal_chunks(seq, num_chunks): -- avg = len(seq) / float(num_chunks) -- out = [] -- last = 0.0 -- while last < len(seq): -- out.append(seq[int(last):int(last + avg)]) -- last += avg -- -+ """it's used on sorted input which is then merged in order""" -+ out = [[] for i in range(num_chunks)] -+ for i, item in enumerate(seq): -+ out[i % num_chunks].append(item) - return out - -+def num_cpus_online(unknown=1): -+ if not hasattr(os, "sysconf"): -+ return unknown -+ -+ if not os.sysconf_names.has_key("SC_NPROCESSORS_ONLN"): -+ return unknown -+ -+ ncpus = os.sysconf("SC_NPROCESSORS_ONLN") -+ try: -+ if int(ncpus) > 0: -+ return ncpus -+ except: -+ pass -+ -+ return unknown -+ - - class MDError(Exception): - def __init__(self, value=None): -diff --git a/createrepo/yumbased.py b/createrepo/yumbased.py -index ac06196..f87ac6d 100644 ---- a/createrepo/yumbased.py -+++ b/createrepo/yumbased.py -@@ -16,6 +16,11 @@ - - - import os -+def _get_umask(): -+ oumask = os.umask(0) -+ os.umask(oumask) -+ return oumask -+_b4rpm_oumask = _get_umask() - import rpm - import types - -@@ -86,6 +91,9 @@ class CreateRepoPackage(YumLocalPackage): - csumo = os.fdopen(csumo, 'w', -1) - csumo.write(checksum) - csumo.close() -+ # tempfile forces 002 ... we want to undo that, so that users -+ # can share the cache. BZ 833350. -+ os.chmod(tmpfilename, 0666 ^ _b4rpm_oumask) - os.rename(tmpfilename, csumfile) - except: - pass -diff --git a/docs/createrepo.8 b/docs/createrepo.8 -index e3c4c3b..ff359de 100644 ---- a/docs/createrepo.8 -+++ b/docs/createrepo.8 -@@ -53,7 +53,8 @@ gullible). - Don't generate repo metadata, if their timestamps are newer than its rpms. - This option decreases the processing time drastically again, if you happen - to run it on an unmodified repo, but it is (currently) mutual exclusive --with the --split option. -+with the --split option. NOTE: This command will not notice when -+packages have been removed from repo. Use --update to handle that. - .br - .IP "\fB\--split\fP" - Run in split media mode. Rather than pass a single directory, take a set of -@@ -104,7 +105,16 @@ Tells createrepo to generate deltarpms and the delta metadata - paths to look for older pkgs to delta against. Can be specified multiple times - .IP "\fB\--num-deltas\fP int" - the number of older versions to make deltas against. Defaults to 1 -- -+.IP "\fB\--read-pkgs-list\fP READ_PKGS_LIST -+output the paths to the pkgs actually read useful with --update -+.IP "\fB\--max-delta-rpm-size\fP MAX_DELTA_RPM_SIZE -+max size of an rpm that to run deltarpm against (in bytes) -+.IP "\fB\--workers\fP WORKERS -+number of workers to spawn to read rpms -+.IP "\fB\--compress-type\fP -+specify which compression method to use: compat (default), -+xz (may not be available), gz, bz2. -+.IP - - .SH "EXAMPLES" - Here is an example of a repository with a groups file. Note that the -diff --git a/genpkgmetadata.py b/genpkgmetadata.py -index 8c98191..c46e441 100755 ---- a/genpkgmetadata.py -+++ b/genpkgmetadata.py -@@ -37,6 +37,12 @@ def parse_args(args, conf): - Sanity check all the things being passed in. - """ - -+ def_workers = os.nice(0) -+ if def_workers > 0: -+ def_workers = 1 # We are niced, so just use a single worker. -+ else: -+ def_workers = 0 # zoooom.... -+ - _def = yum.misc._default_checksums[0] - _avail = yum.misc._available_checksums - parser = OptionParser(version = "createrepo %s" % createrepo.__version__) -@@ -100,6 +106,8 @@ def parse_args(args, conf): - parser.add_option("--simple-md-filenames", dest="simple_md_filenames", - help="do not include the file's checksum in the filename, helps with proxies", - default=False, action="store_true") -+ parser.add_option("--retain-old-md", default=0, type='int', dest='retain_old_md', -+ help="keep around the latest (by timestamp) N copies of the old repodata") - parser.add_option("--distro", default=[], action="append", - help="distro tag and optional cpeid: --distro" "'cpeid,textname'") - parser.add_option("--content", default=[], dest='content_tags', -@@ -119,10 +127,15 @@ def parse_args(args, conf): - parser.add_option("--max-delta-rpm-size", default=100000000, - dest='max_delta_rpm_size', type='int', - help="max size of an rpm that to run deltarpm against (in bytes)") -- -- parser.add_option("--workers", default=1, -+ parser.add_option("--workers", default=def_workers, - dest='workers', type='int', - help="number of workers to spawn to read rpms") -+ parser.add_option("--xz", default=False, -+ action="store_true", -+ help="use xz for repodata compression") -+ parser.add_option("--compress-type", default='compat', dest="compress_type", -+ help="which compression type to use") -+ - - (opts, argsleft) = parser.parse_args(args) - if len(argsleft) > 1 and not opts.split: -@@ -138,6 +151,9 @@ def parse_args(args, conf): - else: - directories = argsleft - -+ if opts.workers >= 128: -+ errorprint(_('Warning: More than 128 workers is a lot. Limiting.')) -+ opts.workers = 128 - if opts.sumtype == 'sha1': - errorprint(_('Warning: It is more compatible to use sha instead of sha1')) - -@@ -155,6 +171,11 @@ def parse_args(args, conf): - - if opts.nodatabase: - opts.database = False -+ -+ # xz is just a shorthand for compress_type -+ if opts.xz and opts.compress_type == 'compat': -+ opts.compress_type='xz' -+ - - # let's switch over to using the conf object - put all the opts into it - for opt in parser.option_list: -@@ -240,6 +261,7 @@ def main(args): - if mdgen.checkTimeStamps(): - if mdgen.conf.verbose: - print _('repo is up to date') -+ mdgen._cleanup_tmp_repodata_dir() - sys.exit(0) - - if conf.profile: -diff --git a/mergerepo.py b/mergerepo.py -index 05e5f5e..80cb1a8 100755 ---- a/mergerepo.py -+++ b/mergerepo.py -@@ -18,6 +18,7 @@ - - import sys - import createrepo.merge -+from createrepo.utils import MDError - from optparse import OptionParser - - #TODO: -@@ -47,6 +48,9 @@ def parse_args(args): - help="Do not merge group(comps) metadata") - parser.add_option("", "--noupdateinfo", default=False, action="store_true", - help="Do not merge updateinfo metadata") -+ parser.add_option("--compress-type", default=None, dest="compress_type", -+ help="which compression type to use") -+ - (opts, argsleft) = parser.parse_args(args) - - if len(opts.repos) < 2: -@@ -77,9 +81,14 @@ def main(args): - rmbase.groups = False - if opts.noupdateinfo: - rmbase.updateinfo = False -- -- rmbase.merge_repos() -- rmbase.write_metadata() -- -+ if opts.compress_type: -+ rmbase.mdconf.compress_type = opts.compress_type -+ try: -+ rmbase.merge_repos() -+ rmbase.write_metadata() -+ except MDError, e: -+ print >> sys.stderr, "Could not merge repos: %s" % e -+ sys.exit(1) -+ - if __name__ == "__main__": - main(sys.argv[1:]) -diff --git a/modifyrepo.py b/modifyrepo.py -index 17094a4..acdde77 100755 ---- a/modifyrepo.py -+++ b/modifyrepo.py -@@ -1,11 +1,15 @@ - #!/usr/bin/python --# This tools is used to insert arbitrary metadata into an RPM repository. -+# This tool is used to manipulate arbitrary metadata in a RPM repository. - # Example: - # ./modifyrepo.py updateinfo.xml myrepo/repodata -+# or -+# ./modifyrepo.py --remove updateinfo.xml myrepo/repodata - # or in Python: - # >>> from modifyrepo import RepoMetadata - # >>> repomd = RepoMetadata('myrepo/repodata') - # >>> repomd.add('updateinfo.xml') -+# or -+# >>> repomd.remove('updateinfo.xml') - # - # This program is free software; you can redistribute it and/or modify - # it under the terms of the GNU General Public License as published by -@@ -20,11 +24,13 @@ - # (C) Copyright 2006 Red Hat, Inc. - # Luke Macken - # modified by Seth Vidal 2008 -+# modified by Daniel Mach 2011 - - import os - import sys - from createrepo import __version__ --from createrepo.utils import checksum_and_rename, GzipFile, MDError -+from createrepo.utils import checksum_and_rename, compressOpen, MDError -+from createrepo.utils import _available_compression - from yum.misc import checksum - - from yum.repoMDObject import RepoMD, RepoMDError, RepoData -@@ -39,6 +45,8 @@ class RepoMetadata: - self.repodir = os.path.abspath(repo) - self.repomdxml = os.path.join(self.repodir, 'repomd.xml') - self.checksum_type = 'sha256' -+ self.compress = False -+ self.compress_type = _available_compression[-1] # best available - - if not os.path.exists(self.repomdxml): - raise MDError, '%s not found' % self.repomdxml -@@ -49,6 +57,35 @@ class RepoMetadata: - except RepoMDError, e: - raise MDError, 'Could not parse %s' % self.repomdxml - -+ def _get_mdtype(self, mdname, mdtype=None): -+ """ Get mdtype from existing mdtype or from a mdname. """ -+ if mdtype: -+ return mdtype -+ return mdname.split('.')[0] -+ -+ def _print_repodata(self, repodata): -+ """ Print repodata details. """ -+ print " type =", repodata.type -+ print " location =", repodata.location[1] -+ print " checksum =", repodata.checksum[1] -+ print " timestamp =", repodata.timestamp -+ print " open-checksum =", repodata.openchecksum[1] -+ -+ def _write_repomd(self): -+ """ Write the updated repomd.xml. """ -+ outmd = file(self.repomdxml, 'w') -+ outmd.write(self.repoobj.dump_xml()) -+ outmd.close() -+ print "Wrote:", self.repomdxml -+ -+ def _remove_repodata_file(self, repodata): -+ """ Remove a file specified in repodata location """ -+ try: -+ os.remove(repodata.location[1]) -+ except OSError, ex: -+ if ex.errno != 2: -+ # continue on a missing file -+ raise MDError("could not remove file %s" % repodata.location[1]) - - def add(self, metadata, mdtype=None): - """ Insert arbitrary metadata into this repository. -@@ -63,8 +100,8 @@ class RepoMetadata: - mdname = 'updateinfo.xml' - elif isinstance(metadata, str): - if os.path.exists(metadata): -- if metadata.endswith('.gz'): -- oldmd = GzipFile(filename=metadata, mode='rb') -+ if metadata.split('.')[-1] in ('gz', 'bz2', 'xz'): -+ oldmd = compressOpen(metadata, mode='rb') - else: - oldmd = file(metadata, 'r') - md = oldmd.read() -@@ -75,14 +112,19 @@ class RepoMetadata: - else: - raise MDError, 'invalid metadata type' - -+ do_compress = False - ## Compress the metadata and move it into the repodata -- if not mdname.endswith('.gz'): -- mdname += '.gz' -- if not mdtype: -- mdtype = mdname.split('.')[0] -- -+ if self.compress or not mdname.split('.')[-1] in ('gz', 'bz2', 'xz'): -+ do_compress = True -+ mdname += '.' + self.compress_type -+ mdtype = self._get_mdtype(mdname, mdtype) -+ - destmd = os.path.join(self.repodir, mdname) -- newmd = GzipFile(filename=destmd, mode='wb') -+ if do_compress: -+ newmd = compressOpen(destmd, mode='wb', compress_type=self.compress_type) -+ else: -+ newmd = open(destmd, 'wb') -+ - newmd.write(md) - newmd.close() - print "Wrote:", destmd -@@ -91,11 +133,8 @@ class RepoMetadata: - csum, destmd = checksum_and_rename(destmd, self.checksum_type) - base_destmd = os.path.basename(destmd) - -- -- ## Remove any stale metadata -- if mdtype in self.repoobj.repoData: -- del self.repoobj.repoData[mdtype] -- -+ # Remove any stale metadata -+ old_rd = self.repoobj.repoData.pop(mdtype, None) - - new_rd = RepoData() - new_rd.type = mdtype -@@ -103,20 +142,30 @@ class RepoMetadata: - new_rd.checksum = (self.checksum_type, csum) - new_rd.openchecksum = (self.checksum_type, open_csum) - new_rd.size = str(os.stat(destmd).st_size) -- new_rd.timestamp = str(os.stat(destmd).st_mtime) -+ new_rd.timestamp = str(int(os.stat(destmd).st_mtime)) - self.repoobj.repoData[new_rd.type] = new_rd -- -- print " type =", new_rd.type -- print " location =", new_rd.location[1] -- print " checksum =", new_rd.checksum[1] -- print " timestamp =", new_rd.timestamp -- print " open-checksum =", new_rd.openchecksum[1] -- -- ## Write the updated repomd.xml -- outmd = file(self.repomdxml, 'w') -- outmd.write(self.repoobj.dump_xml()) -- outmd.close() -- print "Wrote:", self.repomdxml -+ self._print_repodata(new_rd) -+ self._write_repomd() -+ -+ if old_rd is not None and old_rd.location[1] != new_rd.location[1]: -+ # remove the old file when overwriting metadata -+ # with the same mdtype but different location -+ self._remove_repodata_file(old_rd) -+ -+ def remove(self, metadata, mdtype=None): -+ """ Remove metadata from this repository. """ -+ mdname = metadata -+ mdtype = self._get_mdtype(mdname, mdtype) -+ -+ old_rd = self.repoobj.repoData.pop(mdtype, None) -+ if old_rd is None: -+ print "Metadata not found: %s" % mdtype -+ return -+ -+ self._remove_repodata_file(old_rd) -+ print "Removed:" -+ self._print_repodata(old_rd) -+ self._write_repomd() - - - def main(args): -@@ -124,7 +173,13 @@ def main(args): - # query options - parser.add_option("--mdtype", dest='mdtype', - help="specific datatype of the metadata, will be derived from the filename if not specified") -- parser.usage = "modifyrepo [options] " -+ parser.add_option("--remove", action="store_true", -+ help="remove specified file from repodata") -+ parser.add_option("--compress", action="store_true", default=False, -+ help="compress the new repodata before adding it to the repo") -+ parser.add_option("--compress-type", dest='compress_type', default='gz', -+ help="compression format to use") -+ parser.usage = "modifyrepo [options] [--remove] " - - (opts, argsleft) = parser.parse_args(args) - if len(argsleft) != 2: -@@ -137,11 +192,28 @@ def main(args): - except MDError, e: - print "Could not access repository: %s" % str(e) - return 1 -+ -+ -+ repomd.compress = opts.compress -+ if opts.compress_type in _available_compression: -+ repomd.compress_type = opts.compress_type -+ -+ # remove -+ if opts.remove: -+ try: -+ repomd.remove(metadata) -+ except MDError, ex: -+ print "Could not remove metadata: %s" % (metadata, str(ex)) -+ return 1 -+ return -+ -+ # add - try: - repomd.add(metadata, mdtype=opts.mdtype) - except MDError, e: - print "Could not add metadata from file %s: %s" % (metadata, str(e)) - return 1 -+ - - if __name__ == '__main__': - ret = main(sys.argv[1:]) -diff --git a/worker.py b/worker.py -index eb35ef7..b67b5bd 100755 ---- a/worker.py -+++ b/worker.py -@@ -5,6 +5,7 @@ import yum - import createrepo - import os - import rpmUtils -+import re - from optparse import OptionParser - - -@@ -23,6 +24,8 @@ def main(args): - parser = OptionParser() - parser.add_option('--tmpmdpath', default=None, - help="path where the outputs should be dumped for this worker") -+ parser.add_option('--pkglist', default=None, -+ help="file to read the pkglist from in lieu of all of them on the cli") - parser.add_option("--pkgoptions", default=[], action='append', - help="pkgoptions in the format of key=value") - parser.add_option("--quiet", default=False, action='store_true', -@@ -36,10 +39,6 @@ def main(args): - opts, pkgs = parser.parse_args(args) - external_data = {'_packagenumber': 1} - globalopts = {} -- if not opts.tmpmdpath: -- print >> sys.stderr, "tmpmdpath required for destination files" -- sys.exit(1) -- - - for strs in opts.pkgoptions: - k,v = strs.split('=') -@@ -61,18 +60,39 @@ def main(args): - v = None - globalopts[k] = v - -+ # turn off buffering on stdout -+ sys.stdout = os.fdopen(sys.stdout.fileno(), 'w', 0) - - reldir = external_data['_reldir'] - ts = rpmUtils.transaction.initReadOnlyTransaction() -- pri = open(opts.tmpmdpath + '/primary.xml' , 'w') -- fl = open(opts.tmpmdpath + '/filelists.xml' , 'w') -- other = open(opts.tmpmdpath + '/other.xml' , 'w') -- -- -+ if opts.tmpmdpath: -+ files = [open(opts.tmpmdpath + '/%s.xml' % i, 'w') -+ for i in ('primary', 'filelists', 'other')] -+ def output(*xml): -+ for fh, buf in zip(files, xml): -+ fh.write(buf) -+ else: -+ def output(*xml): -+ buf = ' '.join(str(len(i)) for i in xml) -+ sys.stdout.write('*** %s\n' % buf) -+ for buf in xml: -+ sys.stdout.write(buf) -+ -+ if opts.pkglist: -+ for line in open(opts.pkglist,'r').readlines(): -+ line = line.strip() -+ if re.match('^\s*\#.*', line) or re.match('^\s*$', line): -+ continue -+ pkgs.append(line) -+ -+ clog_limit=globalopts.get('clog_limit', None) -+ if clog_limit is not None: -+ clog_limit = int(clog_limit) - for pkgfile in pkgs: - pkgpath = reldir + '/' + pkgfile - if not os.path.exists(pkgpath): - print >> sys.stderr, "File not found: %s" % pkgpath -+ output() - continue - - try: -@@ -80,20 +100,17 @@ def main(args): - print "reading %s" % (pkgfile) - - pkg = createrepo.yumbased.CreateRepoPackage(ts, package=pkgpath, -- external_data=external_data) -- pri.write(pkg.xml_dump_primary_metadata()) -- fl.write(pkg.xml_dump_filelists_metadata()) -- other.write(pkg.xml_dump_other_metadata(clog_limit= -- globalopts.get('clog_limit', None))) -+ sumtype=globalopts.get('sumtype', None), -+ external_data=external_data) -+ output(pkg.xml_dump_primary_metadata(), -+ pkg.xml_dump_filelists_metadata(), -+ pkg.xml_dump_other_metadata(clog_limit=clog_limit)) - except yum.Errors.YumBaseError, e: - print >> sys.stderr, "Error: %s" % e -+ output() - continue - else: - external_data['_packagenumber']+=1 - -- pri.close() -- fl.close() -- other.close() -- - if __name__ == "__main__": - main(sys.argv[1:]) diff --git a/createrepo.spec b/createrepo.spec index 7b59aab..7019ce0 100644 --- a/createrepo.spec +++ b/createrepo.spec @@ -1,13 +1,12 @@ Summary: Creates a common metadata repository Summary(pl.UTF-8): Tworzenie wspólnego repozytorium metadanych Name: createrepo -Version: 0.9.9 -Release: 3.1 +Version: 0.10 +Release: 1 License: GPL v2 Group: Applications/System Source0: http://createrepo.baseurl.org/download/%{name}-%{version}.tar.gz -# Source0-md5: 10641f19a40e9f633b300e23dde00349 -Patch0: %{name}-head.patch +# Source0-md5: 55f83ca2f2d137490ea2182a354d2e68 Patch1: rpm5-caps.patch URL: http://createrepo.baseurl.org/ BuildRequires: bash-completion >= 2.0 @@ -48,7 +47,6 @@ bashowe uzupełnianie nazw dla createrepo. %prep %setup -q -%patch0 -p1 %patch1 -p1 %{__sed} -i -e '1s,#!.*python,#!%{__python},' modifyrepo.py -- 2.44.0