createrepo-head.patch

   1 diff --git a/createrepo.bash b/createrepo.bash
   2 index 54ac8b2..f5a8bb7 100644
   3 --- a/createrepo.bash
   4 +++ b/createrepo.bash
   5 @@ -1,11 +1,17 @@
   6  # bash completion for createrepo and friends
   7
   8 +_cr_compress_type()
   9 +{
  10 +    COMPREPLY=( $( compgen -W "$( ${1:-createrepo} --compress-type=FOO / 2>&1 \
  11 +        | sed -ne 's/,/ /g' -ne 's/.*[Cc]ompression.*://p' )" -- "$2" ) )
  12 +}
  13 +
  14  _cr_createrepo()
  15  {
  16      COMPREPLY=()
  17
  18      case $3 in
  19 -        --version|-h|--help|-u|--baseurl|--distro|--content|--repo|--workers|\
  20 +        --version|-h|--help|-u|--baseurl|--distro|--content|--repo|\
  21          --revision|-x|--excludes|--changelog-limit|--max-delta-rpm-size)
  22              return 0
  23              ;;
  24 @@ -30,10 +36,24 @@ _cr_createrepo()
  25              COMPREPLY=( $( compgen -f -o plusdirs -X '!*.rpm' -- "$2" ) )
  26              return 0
  27              ;;
  28 +        --retain-old-md)
  29 +            COMPREPLY=( $( compgen -W '0 1 2 3 4 5 6 7 8 9' -- "$2" ) )
  30 +            return 0
  31 +            ;;
  32          --num-deltas)
  33              COMPREPLY=( $( compgen -W '1 2 3 4 5 6 7 8 9' -- "$2" ) )
  34              return 0
  35              ;;
  36 +        --workers)
  37 +            local min=2 max=$( getconf _NPROCESSORS_ONLN 2>/dev/null )
  38 +            [[ -z $max || $max -lt $min ]] && max=$min
  39 +            COMPREPLY=( $( compgen -W "{1..$max}" -- "$2" ) )
  40 +            return 0
  41 +            ;;
  42 +        --compress-type)
  43 +            _cr_compress_type "$1" "$2"
  44 +            return 0
  45 +            ;;
  46      esac
  47
  48      if [[ $2 == -* ]] ; then
  49 @@ -42,9 +62,9 @@ _cr_createrepo()
  50              --cachedir --checkts --no-database --update --update-md-path
  51              --skip-stat --split --pkglist --includepkg --outputdir
  52              --skip-symlinks --changelog-limit --unique-md-filenames
  53 -            --simple-md-filenames --distro --content --repo --revision --deltas
  54 -            --oldpackagedirs --num-deltas --read-pkgs-list
  55 -            --max-delta-rpm-size --workers' -- "$2" ) )
  56 +            --simple-md-filenames --retain-old-md --distro --content --repo
  57 +            --revision --deltas --oldpackagedirs --num-deltas --read-pkgs-list
  58 +            --max-delta-rpm-size --workers --compress-type' -- "$2" ) )
  59      else
  60          COMPREPLY=( $( compgen -d -- "$2" ) )
  61      fi
  62 @@ -63,10 +83,14 @@ _cr_mergerepo()
  63              COMPREPLY=( $( compgen -d -- "$2" ) )
  64              return 0
  65              ;;
  66 +        --compress-type)
  67 +            _cr_compress_type "" "$2"
  68 +            return 0
  69 +            ;;
  70      esac
  71
  72      COMPREPLY=( $( compgen -W '--version --help --repo --archlist --no-database
  73 -        --outputdir --nogroups --noupdateinfo' -- "$2" ) )
  74 +        --outputdir --nogroups --noupdateinfo --compress-type' -- "$2" ) )
  75  } &&
  76  complete -F _cr_mergerepo -o filenames mergerepo mergerepo.py
  77
  78 @@ -78,17 +102,22 @@ _cr_modifyrepo()
  79          --version|-h|--help|--mdtype)
  80              return 0
  81              ;;
  82 +        --compress-type)
  83 +            _cr_compress_type "" "$2"
  84 +            return 0
  85 +            ;;
  86      esac
  87
  88      if [[ $2 == -* ]] ; then
  89 -        COMPREPLY=( $( compgen -W '--version --help --mdtype' -- "$2" ) )
  90 +        COMPREPLY=( $( compgen -W '--version --help --mdtype --remove
  91 +            --compress --compress-type' -- "$2" ) )
  92          return 0
  93      fi
  94
  95      local i argnum=1
  96      for (( i=1; i < ${#COMP_WORDS[@]}-1; i++ )) ; do
  97          if [[ ${COMP_WORDS[i]} != -* &&
  98 -                    ${COMP_WORDS[i-1]} != @(=|--mdtype) ]]; then
  99 +              ${COMP_WORDS[i-1]} != @(=|--@(md|compress-)type) ]]; then
 100              argnum=$(( argnum+1 ))
 101          fi
 102      done
 103 diff --git a/createrepo.spec b/createrepo.spec
 104 index 1e491cd..eea7092 100644
 105 --- a/createrepo.spec
 106 +++ b/createrepo.spec
 107 @@ -11,7 +11,7 @@ URL: http://createrepo.baseurl.org/
 108  BuildRoot: %{_tmppath}/%{name}-%{version}root
 109  BuildArchitectures: noarch
 110  Requires: python >= 2.1, rpm-python, rpm >= 0:4.1.1, libxml2-python
 111 -Requires: yum-metadata-parser, yum >= 3.2.29, python-deltarpm
 112 +Requires: yum-metadata-parser, yum >= 3.2.29, python-deltarpm, pyliblzma
 113
 114  %description
 115  This utility will generate a common metadata repository from a directory of
 116 @@ -43,6 +43,9 @@ make DESTDIR=$RPM_BUILD_ROOT sysconfdir=%{_sysconfdir} install
 117  %{python_sitelib}/createrepo
 118
 119  %changelog
 120 +* Fri Sep  9 2011 Seth Vidal <skvidal at fedoraproject.org>
 121 +- add lzma dep
 122 +
 123  * Wed Jan 26 2011 Seth Vidal <skvidal at fedoraproject.org>
 124  - bump to 0.9.9
 125  - add worker.py
 126 diff --git a/createrepo/__init__.py b/createrepo/__init__.py
 127 index 8f2538e..1b18a9f 100644
 128 --- a/createrepo/__init__.py
 129 +++ b/createrepo/__init__.py
 130 @@ -26,15 +26,16 @@ import tempfile
 131  import stat
 132  import fcntl
 133  import subprocess
 134 +from select import select
 135
 136 -from yum import misc, Errors, to_unicode
 137 -from yum.repoMDObject import RepoMD, RepoMDError, RepoData
 138 +from yum import misc, Errors
 139 +from yum.repoMDObject import RepoMD, RepoData
 140  from yum.sqlutils import executeSQL
 141  from yum.packageSack import MetaSack
 142 -from yum.packages import YumAvailablePackage, YumLocalPackage
 143 +from yum.packages import YumAvailablePackage
 144
 145  import rpmUtils.transaction
 146 -from utils import _, errorprint, MDError
 147 +from utils import _, errorprint, MDError, lzma, _available_compression
 148  import readMetadata
 149  try:
 150      import sqlite3 as sqlite
 151 @@ -46,8 +47,9 @@ try:
 152  except ImportError:
 153      pass
 154
 155 -from utils import _gzipOpen, bzipFile, checkAndMakeDir, GzipFile, \
 156 +from utils import _gzipOpen, compressFile, compressOpen, checkAndMakeDir, GzipFile, \
 157                    checksum_and_rename, split_list_into_equal_chunks
 158 +from utils import num_cpus_online
 159  import deltarpms
 160
 161  __version__ = '0.9.9'
 162 @@ -74,7 +76,7 @@ class MetaDataConfig(object):
 163          self.deltadir = None
 164          self.delta_relative = 'drpms/'
 165          self.oldpackage_paths = [] # where to look for the old packages -
 166 -        self.deltafile = 'prestodelta.xml.gz'
 167 +        self.deltafile = 'prestodelta.xml'
 168          self.num_deltas = 1 # number of older versions to delta (max)
 169          self.max_delta_rpm_size = 100000000
 170          self.update_md_path = None
 171 @@ -86,9 +88,9 @@ class MetaDataConfig(object):
 172          self.skip_symlinks = False
 173          self.pkglist = []
 174          self.database_only = False
 175 -        self.primaryfile = 'primary.xml.gz'
 176 -        self.filelistsfile = 'filelists.xml.gz'
 177 -        self.otherfile = 'other.xml.gz'
 178 +        self.primaryfile = 'primary.xml'
 179 +        self.filelistsfile = 'filelists.xml'
 180 +        self.otherfile = 'other.xml'
 181          self.repomdfile = 'repomd.xml'
 182          self.tempdir = '.repodata'
 183          self.finaldir = 'repodata'
 184 @@ -108,8 +110,10 @@ class MetaDataConfig(object):
 185          self.collapse_glibc_requires = True
 186          self.workers = 1 # number of workers to fork off to grab metadata from the pkgs
 187          self.worker_cmd = '/usr/share/createrepo/worker.py'
 188 -
 189          #self.worker_cmd = './worker.py' # helpful when testing
 190 +        self.retain_old_md = 0
 191 +        self.compress_type = 'compat'
 192 +
 193
 194  class SimpleMDCallBack(object):
 195      def errorlog(self, thing):
 196 @@ -141,10 +145,23 @@ class MetaDataGenerator:
 197          self.files = []
 198          self.rpmlib_reqs = {}
 199          self.read_pkgs = []
 200 +        self.compat_compress = False
 201
 202          if not self.conf.directory and not self.conf.directories:
 203              raise MDError, "No directory given on which to run."
 204 -
 205 +
 206 +        if self.conf.compress_type == 'compat':
 207 +            self.compat_compress = True
 208 +            self.conf.compress_type = None
 209 +
 210 +        if not self.conf.compress_type:
 211 +            self.conf.compress_type = 'gz'
 212 +
 213 +        if self.conf.compress_type not in utils._available_compression:
 214 +            raise MDError, "Compression %s not available: Please choose from: %s" \
 215 +                 % (self.conf.compress_type, ', '.join(utils._available_compression))
 216 +
 217 +
 218          if not self.conf.directories: # just makes things easier later
 219              self.conf.directories = [self.conf.directory]
 220          if not self.conf.directory: # ensure we have both in the config object
 221 @@ -290,14 +307,13 @@ class MetaDataGenerator:
 222
 223          def extension_visitor(filelist, dirname, names):
 224              for fn in names:
 225 +                fn = os.path.join(dirname, fn)
 226                  if os.path.isdir(fn):
 227                      continue
 228                  if self.conf.skip_symlinks and os.path.islink(fn):
 229                      continue
 230                  elif fn[-extlen:].lower() == '%s' % (ext):
 231 -                    relativepath = dirname.replace(startdir, "", 1)
 232 -                    relativepath = relativepath.lstrip("/")
 233 -                    filelist.append(os.path.join(relativepath, fn))
 234 +                    filelist.append(fn[len(startdir):])
 235
 236          filelist = []
 237          startdir = directory + '/'
 238 @@ -311,7 +327,7 @@ class MetaDataGenerator:
 239      def checkTimeStamps(self):
 240          """check the timestamp of our target dir. If it is not newer than
 241             the repodata return False, else True"""
 242 -        if self.conf.checkts:
 243 +        if self.conf.checkts and self.conf.mdtimestamp:
 244              dn = os.path.join(self.conf.basedir, self.conf.directory)
 245              files = self.getFileList(dn, '.rpm')
 246              files = self.trimRpms(files)
 247 @@ -410,9 +426,11 @@ class MetaDataGenerator:
 248
 249      def _setupPrimary(self):
 250          # setup the primary metadata file
 251 +        # FIXME - make this be  conf.compress_type once y-m-p is fixed
 252 +        fpz = self.conf.primaryfile + '.' + 'gz'
 253          primaryfilepath = os.path.join(self.conf.outputdir, self.conf.tempdir,
 254 -                                       self.conf.primaryfile)
 255 -        fo = _gzipOpen(primaryfilepath, 'w')
 256 +                                       fpz)
 257 +        fo = compressOpen(primaryfilepath, 'w', 'gz')
 258          fo.write('<?xml version="1.0" encoding="UTF-8"?>\n')
 259          fo.write('<metadata xmlns="http://linux.duke.edu/metadata/common"' \
 260              ' xmlns:rpm="http://linux.duke.edu/metadata/rpm" packages="%s">' %
 261 @@ -421,9 +439,11 @@ class MetaDataGenerator:
 262
 263      def _setupFilelists(self):
 264          # setup the filelist file
 265 +        # FIXME - make this be  conf.compress_type once y-m-p is fixed
 266 +        fpz = self.conf.filelistsfile + '.' + 'gz'
 267          filelistpath = os.path.join(self.conf.outputdir, self.conf.tempdir,
 268 -                                    self.conf.filelistsfile)
 269 -        fo = _gzipOpen(filelistpath, 'w')
 270 +                                    fpz)
 271 +        fo = compressOpen(filelistpath, 'w', 'gz')
 272          fo.write('<?xml version="1.0" encoding="UTF-8"?>\n')
 273          fo.write('<filelists xmlns="http://linux.duke.edu/metadata/filelists"' \
 274                   ' packages="%s">' % self.pkgcount)
 275 @@ -431,9 +451,11 @@ class MetaDataGenerator:
 276
 277      def _setupOther(self):
 278          # setup the other file
 279 +        # FIXME - make this be  conf.compress_type once y-m-p is fixed
 280 +        fpz = self.conf.otherfile + '.' + 'gz'
 281          otherfilepath = os.path.join(self.conf.outputdir, self.conf.tempdir,
 282 -                                     self.conf.otherfile)
 283 -        fo = _gzipOpen(otherfilepath, 'w')
 284 +                                     fpz)
 285 +        fo = compressOpen(otherfilepath, 'w', 'gz')
 286          fo.write('<?xml version="1.0" encoding="UTF-8"?>\n')
 287          fo.write('<otherdata xmlns="http://linux.duke.edu/metadata/other"' \
 288                   ' packages="%s">' %
 289 @@ -442,9 +464,10 @@ class MetaDataGenerator:
 290
 291      def _setupDelta(self):
 292          # setup the other file
 293 +        fpz = self.conf.deltafile + '.' + self.conf.compress_type
 294          deltafilepath = os.path.join(self.conf.outputdir, self.conf.tempdir,
 295 -                                     self.conf.deltafile)
 296 -        fo = _gzipOpen(deltafilepath, 'w')
 297 +                                     fpz)
 298 +        fo = compressOpen(deltafilepath, 'w', self.conf.compress_type)
 299          fo.write('<?xml version="1.0" encoding="UTF-8"?>\n')
 300          fo.write('<prestodelta>\n')
 301          return fo
 302 @@ -520,6 +543,7 @@ class MetaDataGenerator:
 303          # go on their merry way
 304
 305          newpkgs = []
 306 +        keptpkgs = []
 307          if self.conf.update:
 308              # if we're in --update mode then only act on the new/changed pkgs
 309              for pkg in pkglist:
 310 @@ -530,39 +554,13 @@ class MetaDataGenerator:
 311                  old_pkg = pkg
 312                  if pkg.find("://") != -1:
 313                      old_pkg = os.path.basename(pkg)
 314 -                nodes = self.oldData.getNodes(old_pkg)
 315 -                if nodes is not None: # we have a match in the old metadata
 316 +                old_po = self.oldData.getNodes(old_pkg)
 317 +                if old_po: # we have a match in the old metadata
 318                      if self.conf.verbose:
 319                          self.callback.log(_("Using data from old metadata for %s")
 320                                              % pkg)
 321 -                    (primarynode, filenode, othernode) = nodes
 322 -
 323 -                    for node, outfile in ((primarynode, self.primaryfile),
 324 -                                          (filenode, self.flfile),
 325 -                                          (othernode, self.otherfile)):
 326 -                        if node is None:
 327 -                            break
 328 -
 329 -                        if self.conf.baseurl:
 330 -                            anode = node.children
 331 -                            while anode is not None:
 332 -                                if anode.type != "element":
 333 -                                    anode = anode.next
 334 -                                    continue
 335 -                                if anode.name == "location":
 336 -                                    anode.setProp('xml:base', self.conf.baseurl)
 337 -                                anode = anode.next
 338 -
 339 -                        output = node.serialize('UTF-8', self.conf.pretty)
 340 -                        if output:
 341 -                            outfile.write(output)
 342 -                        else:
 343 -                            if self.conf.verbose:
 344 -                                self.callback.log(_("empty serialize on write to" \
 345 -                                                    "%s in %s") % (outfile, pkg))
 346 -                        outfile.write('\n')
 347 -
 348 -                    self.oldData.freeNodes(pkg)
 349 +                    keptpkgs.append((pkg, old_po))
 350 +
 351                      #FIXME - if we're in update and we have deltas enabled
 352                      # check the presto data for this pkg and write its info back out
 353                      # to our deltafile
 354 @@ -584,32 +582,45 @@ class MetaDataGenerator:
 355              po = None
 356              if isinstance(pkg, YumAvailablePackage):
 357                  po = pkg
 358 -                self.read_pkgs.append(po.localpath)
 359 +                self.read_pkgs.append(po.localPkg())
 360
 361              # if we're dealing with remote pkgs - pitch it over to doing
 362              # them one at a time, for now.
 363              elif pkg.find('://') != -1:
 364 -                po = self.read_in_package(pkgfile, pkgpath=pkgpath, reldir=reldir)
 365 +                po = self.read_in_package(pkg, pkgpath=pkgpath, reldir=reldir)
 366                  self.read_pkgs.append(pkg)
 367
 368              if po:
 369 -                self.primaryfile.write(po.xml_dump_primary_metadata())
 370 -                self.flfile.write(po.xml_dump_filelists_metadata())
 371 -                self.otherfile.write(po.xml_dump_other_metadata(
 372 -                                     clog_limit=self.conf.changelog_limit))
 373 +                keptpkgs.append((pkg, po))
 374                  continue
 375
 376              pkgfiles.append(pkg)
 377 -
 378 -
 379 +
 380 +        keptpkgs.sort(reverse=True)
 381 +        # keptkgs is a list of (filename, po), pkgfiles is a list if filenames.
 382 +        # Need to write them in sorted(filename) order.  We loop over pkgfiles,
 383 +        # inserting keptpkgs in right spots (using the upto argument).
 384 +        def save_keptpkgs(upto):
 385 +            while keptpkgs and (upto is None or keptpkgs[-1][0] < upto):
 386 +                filename, po = keptpkgs.pop()
 387 +                # reset baseurl in the old pkg
 388 +                po.basepath = self.conf.baseurl
 389 +                self.primaryfile.write(po.xml_dump_primary_metadata())
 390 +                self.flfile.write(po.xml_dump_filelists_metadata())
 391 +                self.otherfile.write(po.xml_dump_other_metadata(
 392 +                    clog_limit=self.conf.changelog_limit))
 393 +
 394          if pkgfiles:
 395              # divide that list by the number of workers and fork off that many
 396              # workers to tmpdirs
 397              # waitfor the workers to finish and as each one comes in
 398              # open the files they created and write them out to our metadata
 399              # add up the total pkg counts and return that value
 400 -            worker_tmp_path = tempfile.mkdtemp()
 401 -            worker_chunks = utils.split_list_into_equal_chunks(pkgfiles,  self.conf.workers)
 402 +            self._worker_tmp_path = tempfile.mkdtemp() # setting this in the base object so we can clean it up later
 403 +            if self.conf.workers < 1:
 404 +                self.conf.workers = num_cpus_online()
 405 +            pkgfiles.sort()
 406 +            worker_chunks = split_list_into_equal_chunks(pkgfiles, self.conf.workers)
 407              worker_cmd_dict = {}
 408              worker_jobs = {}
 409              base_worker_cmdline = [self.conf.worker_cmd,
 410 @@ -617,7 +628,8 @@ class MetaDataGenerator:
 411                      '--pkgoptions=_collapse_libc_requires=%s' % self.conf.collapse_glibc_requires,
 412                      '--pkgoptions=_cachedir=%s' % self.conf.cachedir,
 413                      '--pkgoptions=_baseurl=%s' % self.conf.baseurl,
 414 -                    '--globalopts=clog_limit=%s' % self.conf.changelog_limit,]
 415 +                    '--globalopts=clog_limit=%s' % self.conf.changelog_limit,
 416 +                    '--globalopts=sumtype=%s' % self.conf.sumtype, ]
 417
 418              if self.conf.quiet:
 419                  base_worker_cmdline.append('--quiet')
 420 @@ -626,15 +638,14 @@ class MetaDataGenerator:
 421                  base_worker_cmdline.append('--verbose')
 422
 423              for worker_num in range(self.conf.workers):
 424 -                # make the worker directory
 425 +                pkl = self._worker_tmp_path + '/pkglist-%s' % worker_num
 426 +                f = open(pkl, 'w')
 427 +                f.write('\n'.join(worker_chunks[worker_num]))
 428 +                f.close()
 429 +
 430                  workercmdline = []
 431                  workercmdline.extend(base_worker_cmdline)
 432 -                thisdir = worker_tmp_path + '/' + str(worker_num)
 433 -                if checkAndMakeDir(thisdir):
 434 -                    workercmdline.append('--tmpmdpath=%s' % thisdir)
 435 -                else:
 436 -                    raise MDError, "Unable to create worker path: %s" % thisdir
 437 -                workercmdline.extend(worker_chunks[worker_num])
 438 +                workercmdline.append('--pkglist=%s/pkglist-%s' % (self._worker_tmp_path, worker_num))
 439                  worker_cmd_dict[worker_num] = workercmdline
 440
 441
 442 @@ -647,49 +658,60 @@ class MetaDataGenerator:
 443                                          stderr=subprocess.PIPE)
 444                  worker_jobs[num] = job
 445
 446 -            gimmebreak = 0
 447 -            while gimmebreak != len(worker_jobs.keys()):
 448 -                gimmebreak = 0
 449 -                for (num,job) in worker_jobs.items():
 450 -                    if job.poll() is not None:
 451 -                        gimmebreak+=1
 452 -                    line = job.stdout.readline()
 453 -                    if line:
 454 +            files = self.primaryfile, self.flfile, self.otherfile
 455 +            def log_messages(num):
 456 +                job = worker_jobs[num]
 457 +                while True:
 458 +                    # check stdout and stderr
 459 +                    for stream in select((job.stdout, job.stderr), (), ())[0]:
 460 +                        line = stream.readline()
 461 +                        if line: break
 462 +                    else:
 463 +                        return # EOF, EOF
 464 +                    if stream is job.stdout:
 465 +                        if line.startswith('*** '):
 466 +                            # get data, save to local files
 467 +                            for out, size in zip(files, line[4:].split()):
 468 +                                out.write(stream.read(int(size)))
 469 +                            return
 470                          self.callback.log('Worker %s: %s' % (num, line.rstrip()))
 471 -                    line = job.stderr.readline()
 472 -                    if line:
 473 +                    else:
 474                          self.callback.errorlog('Worker %s: %s' % (num, line.rstrip()))
 475 +
 476 +            for i, pkg in enumerate(pkgfiles):
 477 +                # insert cached packages
 478 +                save_keptpkgs(pkg)
 479 +
 480 +                # save output to local files
 481 +                log_messages(i % self.conf.workers)
 482 +
 483 +            for (num, job) in worker_jobs.items():
 484 +                # process remaining messages on stderr
 485 +                log_messages(num)
 486 +
 487 +                if job.wait() != 0:
 488 +                    msg = "Worker exited with non-zero value: %s. Fatal." % job.returncode
 489 +                    self.callback.errorlog(msg)
 490 +                    raise MDError, msg
 491
 492 -
 493              if not self.conf.quiet:
 494                  self.callback.log("Workers Finished")
 495 -            # finished with workers
 496 -            # go to their dirs and add the contents
 497 -            if not self.conf.quiet:
 498 -                self.callback.log("Gathering worker results")
 499 -            for num in range(self.conf.workers):
 500 -                for (fn, fo) in (('primary.xml', self.primaryfile),
 501 -                           ('filelists.xml', self.flfile),
 502 -                           ('other.xml', self.otherfile)):
 503 -                    fnpath = worker_tmp_path + '/' + str(num) + '/' + fn
 504 -                    if os.path.exists(fnpath):
 505 -                        fo.write(open(fnpath, 'r').read())
 506 -
 507
 508              for pkgfile in pkgfiles:
 509                  if self.conf.deltas:
 510 -                    po = self.read_in_package(pkgfile, pkgpath=pkgpath, reldir=reldir)
 511 -                    self._do_delta_rpm_package(po)
 512 +                    try:
 513 +                        po = self.read_in_package(pkgfile, pkgpath=pkgpath, reldir=reldir)
 514 +                        self._do_delta_rpm_package(po)
 515 +                    except MDError, e:
 516 +                        errorprint(e)
 517 +                        continue
 518                  self.read_pkgs.append(pkgfile)
 519
 520 +        save_keptpkgs(None) # append anything left
 521          return self.current_pkg
 522
 523
 524      def closeMetadataDocs(self):
 525 -        if not self.conf.quiet:
 526 -            self.callback.log('')
 527 -
 528 -
 529          # save them up to the tmp locations:
 530          if not self.conf.quiet:
 531              self.callback.log(_('Saving Primary metadata'))
 532 @@ -784,7 +806,6 @@ class MetaDataGenerator:
 533              return self._old_package_dict
 534
 535          self._old_package_dict = {}
 536 -        opl = []
 537          for d in self.conf.oldpackage_paths:
 538              for f in self.getFileList(d, '.rpm'):
 539                  fp = d + '/' + f
 540 @@ -833,7 +854,7 @@ class MetaDataGenerator:
 541          return ' '.join(results)
 542
 543      def _createRepoDataObject(self, mdfile, mdtype, compress=True,
 544 -                              compress_type='gzip', attribs={}):
 545 +                              compress_type=None, attribs={}):
 546          """return random metadata as RepoData object to be  added to RepoMD
 547             mdfile = complete path to file
 548             mdtype = the metadata type to use
 549 @@ -843,15 +864,13 @@ class MetaDataGenerator:
 550          sfile = os.path.basename(mdfile)
 551          fo = open(mdfile, 'r')
 552          outdir = os.path.join(self.conf.outputdir, self.conf.tempdir)
 553 +        if not compress_type:
 554 +            compress_type = self.conf.compress_type
 555          if compress:
 556 -            if compress_type == 'gzip':
 557 -                sfile = '%s.gz' % sfile
 558 -                outfn = os.path.join(outdir, sfile)
 559 -                output = GzipFile(filename = outfn, mode='wb')
 560 -            elif compress_type == 'bzip2':
 561 -                sfile = '%s.bz2' % sfile
 562 -                outfn = os.path.join(outdir, sfile)
 563 -                output = BZ2File(filename = outfn, mode='wb')
 564 +            sfile = '%s.%s' % (sfile, compress_type)
 565 +            outfn = os.path.join(outdir, sfile)
 566 +            output = compressOpen(outfn, mode='wb', compress_type=compress_type)
 567 +
 568          else:
 569              outfn  = os.path.join(outdir, sfile)
 570              output = open(outfn, 'w')
 571 @@ -874,14 +893,13 @@ class MetaDataGenerator:
 572
 573          thisdata = RepoData()
 574          thisdata.type = mdtype
 575 -        baseloc = None
 576          thisdata.location = (self.conf.baseurl, os.path.join(self.conf.finaldir, sfile))
 577          thisdata.checksum = (self.conf.sumtype, csum)
 578          if compress:
 579              thisdata.openchecksum  = (self.conf.sumtype, open_csum)
 580
 581          thisdata.size = str(os.stat(outfn).st_size)
 582 -        thisdata.timestamp = str(os.stat(outfn).st_mtime)
 583 +        thisdata.timestamp = str(int(os.stat(outfn).st_mtime))
 584          for (k, v) in attribs.items():
 585              setattr(thisdata, k, str(v))
 586
 587 @@ -925,9 +943,14 @@ class MetaDataGenerator:
 588              rp = sqlitecachec.RepodataParserSqlite(repopath, repomd.repoid, None)
 589
 590          for (rpm_file, ftype) in workfiles:
 591 +            # when we fix y-m-p and non-gzipped xml files - then we can make this just add
 592 +            # self.conf.compress_type
 593 +            if ftype in ('other', 'filelists', 'primary'):
 594 +                rpm_file = rpm_file + '.' + 'gz'
 595 +            elif rpm_file.find('.') != -1 and rpm_file.split('.')[-1] not in _available_compression:
 596 +                rpm_file = rpm_file + '.' + self.conf.compress_type
 597              complete_path = os.path.join(repopath, rpm_file)
 598 -
 599 -            zfo = _gzipOpen(complete_path)
 600 +            zfo = compressOpen(complete_path)
 601              # This is misc.checksum() done locally so we can get the size too.
 602              data = misc.Checksums([sumtype])
 603              while data.read(zfo, 2**16):
 604 @@ -966,14 +989,20 @@ class MetaDataGenerator:
 605                      good_name = '%s.sqlite' % ftype
 606                      resultpath = os.path.join(repopath, good_name)
 607
 608 +                    # compat compression for rhel5 compatibility from fedora :(
 609 +                    compress_type = self.conf.compress_type
 610 +                    if self.compat_compress:
 611 +                        compress_type = 'bz2'
 612 +
 613                      # rename from silly name to not silly name
 614                      os.rename(tmp_result_path, resultpath)
 615 -                    compressed_name = '%s.bz2' % good_name
 616 +                    compressed_name = '%s.%s' % (good_name, compress_type)
 617                      result_compressed = os.path.join(repopath, compressed_name)
 618                      db_csums[ftype] = misc.checksum(sumtype, resultpath)
 619
 620                      # compress the files
 621 -                    bzipFile(resultpath, result_compressed)
 622 +
 623 +                    compressFile(resultpath, result_compressed, compress_type)
 624                      # csum the compressed file
 625                      db_compressed_sums[ftype] = misc.checksum(sumtype,
 626                                                               result_compressed)
 627 @@ -983,8 +1012,8 @@ class MetaDataGenerator:
 628                      os.unlink(resultpath)
 629
 630                      if self.conf.unique_md_filenames:
 631 -                        csum_compressed_name = '%s-%s.bz2' % (
 632 -                                           db_compressed_sums[ftype], good_name)
 633 +                        csum_compressed_name = '%s-%s.%s' % (
 634 +                                           db_compressed_sums[ftype], good_name, compress_type)
 635                          csum_result_compressed =  os.path.join(repopath,
 636                                                             csum_compressed_name)
 637                          os.rename(result_compressed, csum_result_compressed)
 638 @@ -1001,7 +1030,7 @@ class MetaDataGenerator:
 639                      data.location = (self.conf.baseurl,
 640                                os.path.join(self.conf.finaldir, compressed_name))
 641                      data.checksum = (sumtype, db_compressed_sums[ftype])
 642 -                    data.timestamp = str(db_stat.st_mtime)
 643 +                    data.timestamp = str(int(db_stat.st_mtime))
 644                      data.size = str(db_stat.st_size)
 645                      data.opensize = str(un_stat.st_size)
 646                      data.openchecksum = (sumtype, db_csums[ftype])
 647 @@ -1020,7 +1049,13 @@ class MetaDataGenerator:
 648              data.openchecksum = (sumtype, uncsum)
 649
 650              if self.conf.unique_md_filenames:
 651 -                res_file = '%s-%s.xml.gz' % (csum, ftype)
 652 +                if ftype in ('primary', 'filelists', 'other'):
 653 +                    compress = 'gz'
 654 +                else:
 655 +                    compress = self.conf.compress_type
 656 +
 657 +                main_name = '.'.join(rpm_file.split('.')[:-1])
 658 +                res_file = '%s-%s.%s' % (csum, main_name, compress)
 659                  orig_file = os.path.join(repopath, rpm_file)
 660                  dest_file = os.path.join(repopath, res_file)
 661                  os.rename(orig_file, dest_file)
 662 @@ -1046,7 +1081,7 @@ class MetaDataGenerator:
 663
 664
 665          if self.conf.additional_metadata:
 666 -            for md_type, mdfile in self.conf.additional_metadata.items():
 667 +            for md_type, md_file in self.conf.additional_metadata.items():
 668                  mdcontent = self._createRepoDataObject(md_file, md_type)
 669                  repomd.repoData[mdcontent.type] = mdcontent
 670
 671 @@ -1110,23 +1145,43 @@ class MetaDataGenerator:
 672                      raise MDError, _(
 673                      'Could not remove old metadata file: %s: %s') % (oldfile, e)
 674
 675 -        # Move everything else back from olddir (eg. repoview files)
 676 -        try:
 677 -            old_contents = os.listdir(output_old_dir)
 678 -        except (OSError, IOError), e:
 679 -            old_contents = []
 680 -
 681 +        old_to_remove = []
 682 +        old_pr = []
 683 +        old_fl = []
 684 +        old_ot = []
 685 +        old_pr_db = []
 686 +        old_fl_db = []
 687 +        old_ot_db = []
 688          for f in os.listdir(output_old_dir):
 689              oldfile = os.path.join(output_old_dir, f)
 690              finalfile = os.path.join(output_final_dir, f)
 691 -            if f.find('-') != -1 and f.split('-')[1] in ('primary.sqlite.bz2',
 692 -                    'filelists.sqlite.bz2', 'primary.xml.gz','other.sqlite.bz2',
 693 -                    'other.xml.gz','filelists.xml.gz'):
 694 -                os.remove(oldfile) # kill off the old ones
 695 -                continue
 696 -            if f in ('filelists.sqlite.bz2', 'other.sqlite.bz2',
 697 -                     'primary.sqlite.bz2'):
 698 -                os.remove(oldfile)
 699 +
 700 +            for (end,lst) in (('-primary.sqlite', old_pr_db), ('-primary.xml', old_pr),
 701 +                           ('-filelists.sqlite', old_fl_db), ('-filelists.xml', old_fl),
 702 +                           ('-other.sqlite', old_ot_db), ('-other.xml', old_ot)):
 703 +                fn = '.'.join(f.split('.')[:-1])
 704 +                if fn.endswith(end):
 705 +                    lst.append(oldfile)
 706 +                    break
 707 +
 708 +        # make a list of the old metadata files we don't want to remove.
 709 +        for lst in (old_pr, old_fl, old_ot, old_pr_db, old_fl_db, old_ot_db):
 710 +            sortlst = sorted(lst, key=lambda x: os.path.getmtime(x),
 711 +                             reverse=True)
 712 +            for thisf in sortlst[self.conf.retain_old_md:]:
 713 +                old_to_remove.append(thisf)
 714 +
 715 +        for f in os.listdir(output_old_dir):
 716 +            oldfile = os.path.join(output_old_dir, f)
 717 +            finalfile = os.path.join(output_final_dir, f)
 718 +            fn = '.'.join(f.split('.')[:-1])
 719 +            if fn in ('filelists.sqlite', 'other.sqlite',
 720 +                     'primary.sqlite') or oldfile in old_to_remove:
 721 +                try:
 722 +                    os.remove(oldfile)
 723 +                except (OSError, IOError), e:
 724 +                    raise MDError, _(
 725 +                    'Could not remove old metadata file: %s: %s') % (oldfile, e)
 726                  continue
 727
 728              if os.path.exists(finalfile):
 729 @@ -1147,14 +1202,19 @@ class MetaDataGenerator:
 730                      msg += _('Error was %s') % e
 731                      raise MDError, msg
 732
 733 -        try:
 734 -            os.rmdir(output_old_dir)
 735 -        except OSError, e:
 736 -            self.errorlog(_('Could not remove old metadata dir: %s')
 737 -                          % self.conf.olddir)
 738 -            self.errorlog(_('Error was %s') % e)
 739 -            self.errorlog(_('Please clean up this directory manually.'))
 740 +        self._cleanup_tmp_repodata_dir()
 741 +        self._cleanup_update_tmp_dir()
 742 +        self._write_out_read_pkgs_list()
 743 +
 744
 745 +    def _cleanup_update_tmp_dir(self):
 746 +        if not self.conf.update:
 747 +            return
 748 +
 749 +        shutil.rmtree(self.oldData._repo.basecachedir, ignore_errors=True)
 750 +        shutil.rmtree(self.oldData._repo.base_persistdir, ignore_errors=True)
 751 +
 752 +    def _write_out_read_pkgs_list(self):
 753          # write out the read_pkgs_list file with self.read_pkgs
 754          if self.conf.read_pkgs_list:
 755              try:
 756 @@ -1167,6 +1227,23 @@ class MetaDataGenerator:
 757                                % self.conf.read_pkgs_list)
 758                  self.errorlog(_('Error was %s') % e)
 759
 760 +    def _cleanup_tmp_repodata_dir(self):
 761 +        output_old_dir = os.path.join(self.conf.outputdir, self.conf.olddir)
 762 +        output_temp_dir = os.path.join(self.conf.outputdir, self.conf.tempdir)
 763 +        for dirbase in (self.conf.olddir, self.conf.tempdir):
 764 +            dirpath = os.path.join(self.conf.outputdir, dirbase)
 765 +            if os.path.exists(dirpath):
 766 +                try:
 767 +                    os.rmdir(dirpath)
 768 +                except OSError, e:
 769 +                    self.errorlog(_('Could not remove  temp metadata dir: %s')
 770 +                                  % dirbase)
 771 +                    self.errorlog(_('Error was %s') % e)
 772 +                    self.errorlog(_('Please clean up this directory manually.'))
 773 +        # our worker tmp path
 774 +        if hasattr(self, '_worker_tmp_path') and os.path.exists(self._worker_tmp_path):
 775 +            shutil.rmtree(self._worker_tmp_path, ignore_errors=True)
 776 +
 777      def setup_sqlite_dbs(self, initdb=True):
 778          """sets up the sqlite dbs w/table schemas and db_infos"""
 779          destdir = os.path.join(self.conf.outputdir, self.conf.tempdir)
 780 @@ -1194,24 +1271,6 @@ class SplitMetaDataGenerator(MetaDataGenerator):
 781          (scheme, netloc, path, query, fragid) = urlparse.urlsplit(url)
 782          return urlparse.urlunsplit((scheme, netloc, path, query, str(fragment)))
 783
 784 -    def getFileList(self, directory, ext):
 785 -
 786 -        extlen = len(ext)
 787 -
 788 -        def extension_visitor(arg, dirname, names):
 789 -            for fn in names:
 790 -                if os.path.isdir(fn):
 791 -                    continue
 792 -                elif fn[-extlen:].lower() == '%s' % (ext):
 793 -                    reldir = os.path.basename(dirname)
 794 -                    if reldir == os.path.basename(directory):
 795 -                        reldir = ""
 796 -                    arg.append(os.path.join(reldir, fn))
 797 -
 798 -        rpmlist = []
 799 -        os.path.walk(directory, extension_visitor, rpmlist)
 800 -        return rpmlist
 801 -
 802      def doPkgMetadata(self):
 803          """all the heavy lifting for the package metadata"""
 804          if len(self.conf.directories) == 1:
 805 @@ -1232,6 +1291,19 @@ class SplitMetaDataGenerator(MetaDataGenerator):
 806                      thisdir = os.path.join(self.conf.basedir, mydir)
 807
 808              filematrix[mydir] = self.getFileList(thisdir, '.rpm')
 809 +
 810 +            #  pkglist is a bit different for split media, as we have to know
 811 +            # which dir. it belongs to. So we walk the dir. and then filter.
 812 +            # We could be faster by not walking the dir. ... but meh.
 813 +            if self.conf.pkglist:
 814 +                pkglist = set(self.conf.pkglist)
 815 +                pkgs = []
 816 +                for fname in filematrix[mydir]:
 817 +                    if fname not in pkglist:
 818 +                        continue
 819 +                    pkgs.append(fname)
 820 +                filematrix[mydir] = pkgs
 821 +
 822              self.trimRpms(filematrix[mydir])
 823              self.pkgcount += len(filematrix[mydir])
 824
 825 @@ -1240,7 +1312,6 @@ class SplitMetaDataGenerator(MetaDataGenerator):
 826          self.conf.baseurl = self._getFragmentUrl(self.conf.baseurl, mediano)
 827          try:
 828              self.openMetadataDocs()
 829 -            original_basedir = self.conf.basedir
 830              for mydir in self.conf.directories:
 831                  self.conf.baseurl = self._getFragmentUrl(self.conf.baseurl, mediano)
 832                  self.writeMetadataDocs(filematrix[mydir], mydir)
 833 diff --git a/createrepo/merge.py b/createrepo/merge.py
 834 index b3b2ea1..1ac43bb 100644
 835 --- a/createrepo/merge.py
 836 +++ b/createrepo/merge.py
 837 @@ -24,6 +24,7 @@ from yum.misc import unique, getCacheDir
 838  import yum.update_md
 839  import rpmUtils.arch
 840  import operator
 841 +from utils import MDError
 842  import createrepo
 843  import tempfile
 844
 845 @@ -84,6 +85,8 @@ class RepoMergeBase:
 846          # in the repolist
 847          count = 0
 848          for r in self.repolist:
 849 +            if r[0] == '/':
 850 +                r = 'file://' + r # just fix the file repos, this is silly.
 851              count +=1
 852              rid = 'repo%s' % count
 853              n = self.yumbase.add_enable_repo(rid, baseurls=[r],
 854 @@ -92,7 +95,10 @@ class RepoMergeBase:
 855              n._merge_rank = count
 856
 857          #setup our sacks
 858 -        self.yumbase._getSacks(archlist=self.archlist)
 859 +        try:
 860 +            self.yumbase._getSacks(archlist=self.archlist)
 861 +        except yum.Errors.RepoError, e:
 862 +            raise MDError, "Could not setup merge repo pkgsack: %s" % e
 863
 864          myrepos = self.yumbase.repos.listEnabled()
 865
 866 @@ -102,11 +108,16 @@ class RepoMergeBase:
 867      def write_metadata(self, outputdir=None):
 868          mytempdir = tempfile.mkdtemp()
 869          if self.groups:
 870 -            comps_fn = mytempdir + '/groups.xml'
 871 -            compsfile = open(comps_fn, 'w')
 872 -            compsfile.write(self.yumbase.comps.xml())
 873 -            compsfile.close()
 874 -            self.mdconf.groupfile=comps_fn
 875 +            try:
 876 +                comps_fn = mytempdir + '/groups.xml'
 877 +                compsfile = open(comps_fn, 'w')
 878 +                compsfile.write(self.yumbase.comps.xml())
 879 +                compsfile.close()
 880 +            except yum.Errors.GroupsError, e:
 881 +                # groups not being available shouldn't be a fatal error
 882 +                pass
 883 +            else:
 884 +                self.mdconf.groupfile=comps_fn
 885
 886          if self.updateinfo:
 887              ui_fn = mytempdir + '/updateinfo.xml'
 888 diff --git a/createrepo/readMetadata.py b/createrepo/readMetadata.py
 889 index 27d3690..54863cb 100644
 890 --- a/createrepo/readMetadata.py
 891 +++ b/createrepo/readMetadata.py
 892 @@ -16,11 +16,25 @@
 893  # Copyright 2006 Red Hat
 894
 895  import os
 896 -import libxml2
 897  import stat
 898  from utils import errorprint, _
 899
 900 -from yum import repoMDObject
 901 +import yum
 902 +from yum import misc
 903 +from yum.Errors import YumBaseError
 904 +import tempfile
 905 +class CreaterepoPkgOld(yum.sqlitesack.YumAvailablePackageSqlite):
 906 +    # special for special people like us.
 907 +    def _return_remote_location(self):
 908 +
 909 +        if self.basepath:
 910 +            msg = """<location xml:base="%s" href="%s"/>\n""" % (
 911 +                                     misc.to_xml(self.basepath, attrib=True),
 912 +                                     misc.to_xml(self.relativepath, attrib=True))
 913 +        else:
 914 +            msg = """<location href="%s"/>\n""" % misc.to_xml(self.relativepath, attrib=True)
 915 +
 916 +        return msg
 917
 918
 919  class MetadataIndex(object):
 920 @@ -30,178 +44,72 @@ class MetadataIndex(object):
 921              opts = {}
 922          self.opts = opts
 923          self.outputdir = outputdir
 924 +        realpath = os.path.realpath(outputdir)
 925          repodatadir = self.outputdir + '/repodata'
 926 -        myrepomdxml = repodatadir + '/repomd.xml'
 927 -        if os.path.exists(myrepomdxml):
 928 -            repomd = repoMDObject.RepoMD('garbageid', myrepomdxml)
 929 -            b = repomd.getData('primary').location[1]
 930 -            f = repomd.getData('filelists').location[1]
 931 -            o = repomd.getData('other').location[1]
 932 -            basefile = os.path.join(self.outputdir, b)
 933 -            filelistfile = os.path.join(self.outputdir, f)
 934 -            otherfile = os.path.join(self.outputdir, o)
 935 -        else:
 936 -            basefile = filelistfile = otherfile = ""
 937 -
 938 -        self.files = {'base' : basefile,
 939 -                      'filelist' : filelistfile,
 940 -                      'other' : otherfile}
 941 -        self.scan()
 942 +        self._repo = yum.yumRepo.YumRepository('garbageid')
 943 +        self._repo.baseurl = 'file://' + realpath
 944 +        self._repo.basecachedir = tempfile.mkdtemp(dir='/var/tmp', prefix="createrepo")
 945 +        self._repo.base_persistdir = tempfile.mkdtemp(dir='/var/tmp', prefix="createrepo-p")
 946 +        self._repo.metadata_expire = 1
 947 +        self._repo.gpgcheck = 0
 948 +        self._repo.repo_gpgcheck = 0
 949 +        self._repo._sack = yum.sqlitesack.YumSqlitePackageSack(CreaterepoPkgOld)
 950 +        self.pkg_tups_by_path = {}
 951 +        try:
 952 +            self.scan()
 953 +        except YumBaseError, e:
 954 +            print "Could not find valid repo at: %s" % self.outputdir
 955 +
 956
 957      def scan(self):
 958 -        """Read in and index old repo data"""
 959 -        self.basenodes = {}
 960 -        self.filesnodes = {}
 961 -        self.othernodes = {}
 962 -        self.pkg_ids = {}
 963 +        """Read in old repodata"""
 964          if self.opts.get('verbose'):
 965              print _("Scanning old repo data")
 966 -        for fn in self.files.values():
 967 -            if not os.path.exists(fn):
 968 -                #cannot scan
 969 -                errorprint(_("Warning: Old repodata file missing: %s") % fn)
 970 -                return
 971 -        root = libxml2.parseFile(self.files['base']).getRootElement()
 972 -        self._scanPackageNodes(root, self._handleBase)
 973 -        if self.opts.get('verbose'):
 974 -            print _("Indexed %i base nodes" % len(self.basenodes))
 975 -        root = libxml2.parseFile(self.files['filelist']).getRootElement()
 976 -        self._scanPackageNodes(root, self._handleFiles)
 977 -        if self.opts.get('verbose'):
 978 -            print _("Indexed %i filelist nodes" % len(self.filesnodes))
 979 -        root = libxml2.parseFile(self.files['other']).getRootElement()
 980 -        self._scanPackageNodes(root, self._handleOther)
 981 -        if self.opts.get('verbose'):
 982 -            print _("Indexed %i other nodes" % len(self.othernodes))
 983 -        #reverse index pkg ids to track references
 984 -        self.pkgrefs = {}
 985 -        for relpath, pkgid in self.pkg_ids.iteritems():
 986 -            self.pkgrefs.setdefault(pkgid,[]).append(relpath)
 987 -
 988 -    def _scanPackageNodes(self, root, handler):
 989 -        node = root.children
 990 -        while node is not None:
 991 -            if node.type != "element":
 992 -                node = node.next
 993 +        self._repo.sack.populate(self._repo, 'all', None, False)
 994 +        for thispo in self._repo.sack:
 995 +            mtime = thispo.filetime
 996 +            size = thispo.size
 997 +            relpath = thispo.relativepath
 998 +            do_stat = self.opts.get('do_stat', True)
 999 +            if mtime is None:
1000 +                print _("mtime missing for %s") % relpath
1001                  continue
1002 -            if node.name == "package":
1003 -                handler(node)
1004 -            node = node.next
1005 -
1006 -    def _handleBase(self, node):
1007 -        top = node
1008 -        node = node.children
1009 -        pkgid = None
1010 -        mtime = None
1011 -        size = None
1012 -        relpath = None
1013 -        do_stat = self.opts.get('do_stat', True)
1014 -        while node is not None:
1015 -            if node.type != "element":
1016 -                node = node.next
1017 +            if size is None:
1018 +                print _("size missing for %s") % relpath
1019                  continue
1020 -            if node.name == "checksum":
1021 -                pkgid = node.content
1022 -            elif node.name == "time":
1023 -                mtime = int(node.prop('file'))
1024 -            elif node.name == "size":
1025 -                size = int(node.prop('package'))
1026 -            elif node.name == "location":
1027 -                relpath = node.prop('href')
1028 -            node = node.next
1029 -        if relpath is None:
1030 -            print _("Incomplete data for node")
1031 -            return
1032 -        if pkgid is None:
1033 -            print _("pkgid missing for %s") % relpath
1034 -            return
1035 -        if mtime is None:
1036 -            print _("mtime missing for %s") % relpath
1037 -            return
1038 -        if size is None:
1039 -            print _("size missing for %s") % relpath
1040 -            return
1041 -        if do_stat:
1042 -            filepath = os.path.join(self.opts['pkgdir'], relpath)
1043 -            try:
1044 -                st = os.stat(filepath)
1045 -            except OSError:
1046 -                #file missing -- ignore
1047 -                return
1048 -            if not stat.S_ISREG(st.st_mode):
1049 -                #ignore non files
1050 -                return
1051 -            #check size and mtime
1052 -            if st.st_size != size:
1053 -                if self.opts.get('verbose'):
1054 -                    print _("Size (%i -> %i) changed for file %s") % (size,st.st_size,filepath)
1055 -                return
1056 -            if int(st.st_mtime) != mtime:
1057 -                if self.opts.get('verbose'):
1058 -                    print _("Modification time changed for %s") % filepath
1059 -                return
1060 -        #otherwise we index
1061 -        self.basenodes[relpath] = top
1062 -        self.pkg_ids[relpath] = pkgid
1063 -
1064 -    def _handleFiles(self, node):
1065 -        pkgid = node.prop('pkgid')
1066 -        if pkgid:
1067 -            self.filesnodes[pkgid] = node
1068 -
1069 -    def _handleOther(self, node):
1070 -        pkgid = node.prop('pkgid')
1071 -        if pkgid:
1072 -            self.othernodes[pkgid] = node
1073 +            if do_stat:
1074 +                filepath = os.path.join(self.opts['pkgdir'], relpath)
1075 +                try:
1076 +                    st = os.stat(filepath)
1077 +                except OSError:
1078 +                    #file missing -- ignore
1079 +                    continue
1080 +                if not stat.S_ISREG(st.st_mode):
1081 +                    #ignore non files
1082 +                    continue
1083 +                #check size and mtime
1084 +                if st.st_size != size:
1085 +                    if self.opts.get('verbose'):
1086 +                        print _("Size (%i -> %i) changed for file %s") % (size,st.st_size,filepath)
1087 +                    continue
1088 +                if int(st.st_mtime) != mtime:
1089 +                    if self.opts.get('verbose'):
1090 +                        print _("Modification time changed for %s") % filepath
1091 +                    continue
1092 +
1093 +            self.pkg_tups_by_path[relpath] = thispo.pkgtup
1094 +
1095
1096 -    def getNodes(self, relpath):
1097 -        """Return base, filelist, and other nodes for file, if they exist
1098
1099 -        Returns a tuple of nodes, or None if not found
1100 +    def getNodes(self, relpath):
1101 +        """return a package object based on relative path of pkg
1102          """
1103 -        bnode = self.basenodes.get(relpath,None)
1104 -        if bnode is None:
1105 -            return None
1106 -        pkgid = self.pkg_ids.get(relpath,None)
1107 -        if pkgid is None:
1108 -            print _("No pkgid found for: %s") % relpath
1109 -            return None
1110 -        fnode = self.filesnodes.get(pkgid,None)
1111 -        if fnode is None:
1112 -            return None
1113 -        onode = self.othernodes.get(pkgid,None)
1114 -        if onode is None:
1115 -            return None
1116 -        return bnode, fnode, onode
1117 -
1118 -    def freeNodes(self,relpath):
1119 -        #causing problems
1120 -        """Free up nodes corresponding to file, if possible"""
1121 -        bnode = self.basenodes.get(relpath,None)
1122 -        if bnode is None:
1123 -            print "Missing node for %s" % relpath
1124 -            return
1125 -        bnode.unlinkNode()
1126 -        bnode.freeNode()
1127 -        del self.basenodes[relpath]
1128 -        pkgid = self.pkg_ids.get(relpath,None)
1129 -        if pkgid is None:
1130 -            print _("No pkgid found for: %s") % relpath
1131 -            return None
1132 -        del self.pkg_ids[relpath]
1133 -        dups = self.pkgrefs.get(pkgid)
1134 -        dups.remove(relpath)
1135 -        if len(dups):
1136 -            #still referenced
1137 -            return
1138 -        del self.pkgrefs[pkgid]
1139 -        for nodes in self.filesnodes, self.othernodes:
1140 -            node = nodes.get(pkgid)
1141 -            if node is not None:
1142 -                node.unlinkNode()
1143 -                node.freeNode()
1144 -                del nodes[pkgid]
1145 +        if relpath in self.pkg_tups_by_path:
1146 +            pkgtup = self.pkg_tups_by_path[relpath]
1147 +            return self._repo.sack.searchPkgTuple(pkgtup)[0]
1148 +        return None
1149
1150 +
1151
1152  if __name__ == "__main__":
1153      cwd = os.getcwd()
1154 @@ -209,9 +117,9 @@ if __name__ == "__main__":
1155              'pkgdir': cwd}
1156
1157      idx = MetadataIndex(cwd, opts)
1158 -    for fn in idx.basenodes.keys():
1159 -        a,b,c, = idx.getNodes(fn)
1160 -        a.serialize()
1161 -        b.serialize()
1162 -        c.serialize()
1163 -        idx.freeNodes(fn)
1164 +    for fn in idx.pkg_tups_by_path:
1165 +        po = idx.getNodes(fn)
1166 +        print po.xml_dump_primary_metadata()
1167 +        print po.xml_dump_filelists_metadata()
1168 +        print po.xml_dump_other_metadata()
1169 +
1170 diff --git a/createrepo/utils.py b/createrepo/utils.py
1171 index 995c3b9..b0d92ec 100644
1172 --- a/createrepo/utils.py
1173 +++ b/createrepo/utils.py
1174 @@ -23,6 +23,12 @@ import bz2
1175  import gzip
1176  from gzip import write32u, FNAME
1177  from yum import misc
1178 +_available_compression = ['gz', 'bz2']
1179 +try:
1180 +    import lzma
1181 +    _available_compression.append('xz')
1182 +except ImportError:
1183 +    lzma = None
1184
1185  def errorprint(stuff):
1186      print >> sys.stderr, stuff
1187 @@ -34,22 +40,14 @@ def _(args):
1188
1189  class GzipFile(gzip.GzipFile):
1190      def _write_gzip_header(self):
1191 +        # Generate a header that is easily reproduced with gzip -9 -n on
1192 +        # an unix-like system
1193          self.fileobj.write('\037\213')             # magic header
1194          self.fileobj.write('\010')                 # compression method
1195 -        if hasattr(self, 'name'):
1196 -            fname = self.name[:-3]
1197 -        else:
1198 -            fname = self.filename[:-3]
1199 -        flags = 0
1200 -        if fname:
1201 -            flags = FNAME
1202 -        self.fileobj.write(chr(flags))
1203 -        write32u(self.fileobj, long(0))
1204 -        self.fileobj.write('\002')
1205 -        self.fileobj.write('\377')
1206 -        if fname:
1207 -            self.fileobj.write(fname + '\000')
1208 -
1209 +        self.fileobj.write('\000')                 # flags
1210 +        write32u(self.fileobj, long(0))            # timestamp
1211 +        self.fileobj.write('\002')                 # max compression
1212 +        self.fileobj.write('\003')                 # UNIX
1213
1214  def _gzipOpen(filename, mode="rb", compresslevel=9):
1215      return GzipFile(filename, mode, compresslevel)
1216 @@ -69,6 +67,75 @@ def bzipFile(source, dest):
1217      s_fn.close()
1218
1219
1220 +def xzFile(source, dest):
1221 +    if not 'xz' in _available_compression:
1222 +        raise MDError, "Cannot use xz for compression, library/module is not available"
1223 +
1224 +    s_fn = open(source, 'rb')
1225 +    destination = lzma.LZMAFile(dest, 'w')
1226 +
1227 +    while True:
1228 +        data = s_fn.read(1024000)
1229 +
1230 +        if not data: break
1231 +        destination.write(data)
1232 +
1233 +    destination.close()
1234 +    s_fn.close()
1235 +
1236 +def gzFile(source, dest):
1237 +
1238 +    s_fn = open(source, 'rb')
1239 +    destination = GzipFile(dest, 'w')
1240 +
1241 +    while True:
1242 +        data = s_fn.read(1024000)
1243 +
1244 +        if not data: break
1245 +        destination.write(data)
1246 +
1247 +    destination.close()
1248 +    s_fn.close()
1249 +
1250 +
1251 +class Duck:
1252 +    def __init__(self, **attr):
1253 +        self.__dict__ = attr
1254 +
1255 +
1256 +def compressFile(source, dest, compress_type):
1257 +    """Compress an existing file using any compression type from source to dest"""
1258 +
1259 +    if compress_type == 'xz':
1260 +        xzFile(source, dest)
1261 +    elif compress_type == 'bz2':
1262 +        bzipFile(source, dest)
1263 +    elif compress_type == 'gz':
1264 +        gzFile(source, dest)
1265 +    else:
1266 +        raise MDError, "Unknown compression type %s" % compress_type
1267 +
1268 +def compressOpen(fn, mode='rb', compress_type=None):
1269 +
1270 +    if not compress_type:
1271 +        # we are readonly and we don't give a compress_type - then guess based on the file extension
1272 +        compress_type = fn.split('.')[-1]
1273 +        if compress_type not in _available_compression:
1274 +            compress_type = 'gz'
1275 +
1276 +    if compress_type == 'xz':
1277 +        fh = lzma.LZMAFile(fn, mode)
1278 +        if mode == 'w':
1279 +            fh = Duck(write=lambda s, write=fh.write: s != '' and write(s),
1280 +                      close=fh.close)
1281 +        return fh
1282 +    elif compress_type == 'bz2':
1283 +        return bz2.BZ2File(fn, mode)
1284 +    elif compress_type == 'gz':
1285 +        return _gzipOpen(fn, mode)
1286 +    else:
1287 +        raise MDError, "Unknown compression type %s" % compress_type
1288 +
1289  def returnFD(filename):
1290      try:
1291          fdno = os.open(filename, os.O_RDONLY)
1292 @@ -124,15 +191,28 @@ def encodefiletypelist(filetypelist):
1293      return result
1294
1295  def split_list_into_equal_chunks(seq, num_chunks):
1296 -    avg = len(seq) / float(num_chunks)
1297 -    out = []
1298 -    last = 0.0
1299 -    while last < len(seq):
1300 -        out.append(seq[int(last):int(last + avg)])
1301 -        last += avg
1302 -
1303 +    """it's used on sorted input which is then merged in order"""
1304 +    out = [[] for i in range(num_chunks)]
1305 +    for i, item in enumerate(seq):
1306 +        out[i % num_chunks].append(item)
1307      return out
1308
1309 +def num_cpus_online(unknown=1):
1310 +    if not hasattr(os, "sysconf"):
1311 +        return unknown
1312 +
1313 +    if not os.sysconf_names.has_key("SC_NPROCESSORS_ONLN"):
1314 +        return unknown
1315 +
1316 +    ncpus = os.sysconf("SC_NPROCESSORS_ONLN")
1317 +    try:
1318 +        if int(ncpus) > 0:
1319 +            return ncpus
1320 +    except:
1321 +        pass
1322 +
1323 +    return unknown
1324 +
1325
1326  class MDError(Exception):
1327      def __init__(self, value=None):
1328 diff --git a/createrepo/yumbased.py b/createrepo/yumbased.py
1329 index ac06196..f87ac6d 100644
1330 --- a/createrepo/yumbased.py
1331 +++ b/createrepo/yumbased.py
1332 @@ -16,6 +16,11 @@
1333
1334
1335  import os
1336 +def _get_umask():
1337 +   oumask = os.umask(0)
1338 +   os.umask(oumask)
1339 +   return oumask
1340 +_b4rpm_oumask = _get_umask()
1341  import rpm
1342  import types
1343
1344 @@ -86,6 +91,9 @@ class CreateRepoPackage(YumLocalPackage):
1345                  csumo = os.fdopen(csumo, 'w', -1)
1346                  csumo.write(checksum)
1347                  csumo.close()
1348 +                #  tempfile forces 002 ... we want to undo that, so that users
1349 +                # can share the cache. BZ 833350.
1350 +                os.chmod(tmpfilename, 0666 ^ _b4rpm_oumask)
1351                  os.rename(tmpfilename, csumfile)
1352              except:
1353                  pass
1354 diff --git a/docs/createrepo.8 b/docs/createrepo.8
1355 index e3c4c3b..ff359de 100644
1356 --- a/docs/createrepo.8
1357 +++ b/docs/createrepo.8
1358 @@ -53,7 +53,8 @@ gullible).
1359  Don't generate repo metadata, if their timestamps are newer than its rpms.
1360  This option decreases the processing time drastically again, if you happen
1361  to run it on an unmodified repo, but it is (currently) mutual exclusive
1362 -with the --split option.
1363 +with the --split option. NOTE: This command will not notice when
1364 +packages have been removed from repo. Use --update to handle that.
1365  .br
1366  .IP "\fB\--split\fP"
1367  Run in split media mode. Rather than pass a single directory, take a set of
1368 @@ -104,7 +105,16 @@ Tells createrepo to generate deltarpms and the delta metadata
1369  paths to look for older pkgs to delta against. Can be specified multiple times
1370  .IP "\fB\--num-deltas\fP int"
1371  the number of older versions to make deltas against. Defaults to 1
1372 -
1373 +.IP "\fB\--read-pkgs-list\fP READ_PKGS_LIST
1374 +output the paths to the pkgs actually read useful with  --update
1375 +.IP "\fB\--max-delta-rpm-size\fP MAX_DELTA_RPM_SIZE
1376 +max size of an rpm that to run deltarpm against (in bytes)
1377 +.IP "\fB\--workers\fP WORKERS
1378 +number of workers to spawn to read rpms
1379 +.IP "\fB\--compress-type\fP
1380 +specify which compression method to use: compat (default),
1381 +xz (may not be available), gz, bz2.
1382 +.IP
1383
1384  .SH "EXAMPLES"
1385  Here is an example of a repository with a groups file. Note that the
1386 diff --git a/genpkgmetadata.py b/genpkgmetadata.py
1387 index 8c98191..c46e441 100755
1388 --- a/genpkgmetadata.py
1389 +++ b/genpkgmetadata.py
1390 @@ -37,6 +37,12 @@ def parse_args(args, conf):
1391         Sanity check all the things being passed in.
1392      """
1393
1394 +    def_workers = os.nice(0)
1395 +    if def_workers > 0:
1396 +        def_workers = 1 # We are niced, so just use a single worker.
1397 +    else:
1398 +        def_workers = 0 # zoooom....
1399 +
1400      _def   = yum.misc._default_checksums[0]
1401      _avail = yum.misc._available_checksums
1402      parser = OptionParser(version = "createrepo %s" % createrepo.__version__)
1403 @@ -100,6 +106,8 @@ def parse_args(args, conf):
1404      parser.add_option("--simple-md-filenames", dest="simple_md_filenames",
1405          help="do not include the file's checksum in the filename, helps with proxies",
1406          default=False, action="store_true")
1407 +    parser.add_option("--retain-old-md", default=0, type='int', dest='retain_old_md',
1408 +        help="keep around the latest (by timestamp) N copies of the old repodata")
1409      parser.add_option("--distro", default=[], action="append",
1410          help="distro tag and optional cpeid: --distro" "'cpeid,textname'")
1411      parser.add_option("--content", default=[], dest='content_tags',
1412 @@ -119,10 +127,15 @@ def parse_args(args, conf):
1413      parser.add_option("--max-delta-rpm-size", default=100000000,
1414          dest='max_delta_rpm_size', type='int',
1415          help="max size of an rpm that to run deltarpm against (in bytes)")
1416 -
1417 -    parser.add_option("--workers", default=1,
1418 +    parser.add_option("--workers", default=def_workers,
1419          dest='workers', type='int',
1420          help="number of workers to spawn to read rpms")
1421 +    parser.add_option("--xz", default=False,
1422 +        action="store_true",
1423 +        help="use xz for repodata compression")
1424 +    parser.add_option("--compress-type", default='compat', dest="compress_type",
1425 +        help="which compression type to use")
1426 +
1427
1428      (opts, argsleft) = parser.parse_args(args)
1429      if len(argsleft) > 1 and not opts.split:
1430 @@ -138,6 +151,9 @@ def parse_args(args, conf):
1431      else:
1432          directories = argsleft
1433
1434 +    if opts.workers >= 128:
1435 +        errorprint(_('Warning: More than 128 workers is a lot. Limiting.'))
1436 +        opts.workers = 128
1437      if opts.sumtype == 'sha1':
1438          errorprint(_('Warning: It is more compatible to use sha instead of sha1'))
1439
1440 @@ -155,6 +171,11 @@ def parse_args(args, conf):
1441
1442      if opts.nodatabase:
1443          opts.database = False
1444 +
1445 +    # xz is just a shorthand for compress_type
1446 +    if opts.xz and opts.compress_type == 'compat':
1447 +        opts.compress_type='xz'
1448 +
1449
1450      # let's switch over to using the conf object - put all the opts into it
1451      for opt in parser.option_list:
1452 @@ -240,6 +261,7 @@ def main(args):
1453              if mdgen.checkTimeStamps():
1454                  if mdgen.conf.verbose:
1455                      print _('repo is up to date')
1456 +                mdgen._cleanup_tmp_repodata_dir()
1457                  sys.exit(0)
1458
1459          if conf.profile:
1460 diff --git a/mergerepo.py b/mergerepo.py
1461 index 05e5f5e..80cb1a8 100755
1462 --- a/mergerepo.py
1463 +++ b/mergerepo.py
1464 @@ -18,6 +18,7 @@
1465
1466  import sys
1467  import createrepo.merge
1468 +from createrepo.utils import MDError
1469  from optparse import OptionParser
1470
1471  #TODO:
1472 @@ -47,6 +48,9 @@ def parse_args(args):
1473                        help="Do not merge group(comps) metadata")
1474      parser.add_option("", "--noupdateinfo", default=False, action="store_true",
1475                        help="Do not merge updateinfo metadata")
1476 +    parser.add_option("--compress-type", default=None, dest="compress_type",
1477 +                      help="which compression type to use")
1478 +
1479      (opts, argsleft) = parser.parse_args(args)
1480
1481      if len(opts.repos) < 2:
1482 @@ -77,9 +81,14 @@ def main(args):
1483          rmbase.groups = False
1484      if opts.noupdateinfo:
1485          rmbase.updateinfo = False
1486 -
1487 -    rmbase.merge_repos()
1488 -    rmbase.write_metadata()
1489 -
1490 +    if opts.compress_type:
1491 +        rmbase.mdconf.compress_type = opts.compress_type
1492 +    try:
1493 +        rmbase.merge_repos()
1494 +        rmbase.write_metadata()
1495 +    except MDError, e:
1496 +        print >> sys.stderr, "Could not merge repos: %s" % e
1497 +        sys.exit(1)
1498 +
1499  if __name__ == "__main__":
1500      main(sys.argv[1:])
1501 diff --git a/modifyrepo.py b/modifyrepo.py
1502 index 17094a4..bf1eec0 100755
1503 --- a/modifyrepo.py
1504 +++ b/modifyrepo.py
1505 @@ -1,11 +1,15 @@
1506  #!/usr/bin/python
1507 -# This tools is used to insert arbitrary metadata into an RPM repository.
1508 +# This tool is used to manipulate arbitrary metadata in a RPM repository.
1509  # Example:
1510  #           ./modifyrepo.py updateinfo.xml myrepo/repodata
1511 +#           or
1512 +#           ./modifyrepo.py --remove updateinfo.xml myrepo/repodata
1513  # or in Python:
1514  #           >>> from modifyrepo import RepoMetadata
1515  #           >>> repomd = RepoMetadata('myrepo/repodata')
1516  #           >>> repomd.add('updateinfo.xml')
1517 +#           or
1518 +#           >>> repomd.remove('updateinfo.xml')
1519  #
1520  # This program is free software; you can redistribute it and/or modify
1521  # it under the terms of the GNU General Public License as published by
1522 @@ -20,11 +24,13 @@
1523  # (C) Copyright 2006  Red Hat, Inc.
1524  # Luke Macken <lmacken@redhat.com>
1525  # modified by Seth Vidal 2008
1526 +# modified by Daniel Mach 2011
1527
1528  import os
1529  import sys
1530  from createrepo import __version__
1531 -from createrepo.utils import checksum_and_rename, GzipFile, MDError
1532 +from createrepo.utils import checksum_and_rename, compressOpen, MDError
1533 +from createrepo.utils import _available_compression
1534  from yum.misc import checksum
1535
1536  from yum.repoMDObject import RepoMD, RepoMDError, RepoData
1537 @@ -39,6 +45,8 @@ class RepoMetadata:
1538          self.repodir = os.path.abspath(repo)
1539          self.repomdxml = os.path.join(self.repodir, 'repomd.xml')
1540          self.checksum_type = 'sha256'
1541 +        self.compress = False
1542 +        self.compress_type = _available_compression[-1] # best available
1543
1544          if not os.path.exists(self.repomdxml):
1545              raise MDError, '%s not found' % self.repomdxml
1546 @@ -49,6 +57,35 @@ class RepoMetadata:
1547          except RepoMDError, e:
1548              raise MDError, 'Could not parse %s' % self.repomdxml
1549
1550 +    def _get_mdtype(self, mdname, mdtype=None):
1551 +        """ Get mdtype from existing mdtype or from a mdname. """
1552 +        if mdtype:
1553 +            return mdtype
1554 +        return mdname.split('.')[0]
1555 +
1556 +    def _print_repodata(self, repodata):
1557 +        """ Print repodata details. """
1558 +        print "           type =", repodata.type
1559 +        print "       location =", repodata.location[1]
1560 +        print "       checksum =", repodata.checksum[1]
1561 +        print "      timestamp =", repodata.timestamp
1562 +        print "  open-checksum =", repodata.openchecksum[1]
1563 +
1564 +    def _write_repomd(self):
1565 +        """ Write the updated repomd.xml. """
1566 +        outmd = file(self.repomdxml, 'w')
1567 +        outmd.write(self.repoobj.dump_xml())
1568 +        outmd.close()
1569 +        print "Wrote:", self.repomdxml
1570 +
1571 +    def _remove_repodata_file(self, repodata):
1572 +        """ Remove a file specified in repodata location """
1573 +        try:
1574 +            os.remove(repodata.location[1])
1575 +        except OSError, ex:
1576 +            if ex.errno != 2:
1577 +                # continue on a missing file
1578 +                raise MDError("could not remove file %s" % repodata.location[1])
1579
1580      def add(self, metadata, mdtype=None):
1581          """ Insert arbitrary metadata into this repository.
1582 @@ -63,8 +100,8 @@ class RepoMetadata:
1583              mdname = 'updateinfo.xml'
1584          elif isinstance(metadata, str):
1585              if os.path.exists(metadata):
1586 -                if metadata.endswith('.gz'):
1587 -                    oldmd = GzipFile(filename=metadata, mode='rb')
1588 +                if metadata.split('.')[-1] in ('gz', 'bz2', 'xz'):
1589 +                    oldmd = compressOpen(metadata, mode='rb')
1590                  else:
1591                      oldmd = file(metadata, 'r')
1592                  md = oldmd.read()
1593 @@ -75,14 +112,19 @@ class RepoMetadata:
1594          else:
1595              raise MDError, 'invalid metadata type'
1596
1597 +        do_compress = False
1598          ## Compress the metadata and move it into the repodata
1599 -        if not mdname.endswith('.gz'):
1600 -            mdname += '.gz'
1601 -        if not mdtype:
1602 -            mdtype = mdname.split('.')[0]
1603 -
1604 +        if self.compress or not mdname.split('.')[-1] in ('gz', 'bz2', 'xz'):
1605 +            do_compress = True
1606 +            mdname += '.' + self.compress_type
1607 +        mdtype = self._get_mdtype(mdname, mdtype)
1608 +
1609          destmd = os.path.join(self.repodir, mdname)
1610 -        newmd = GzipFile(filename=destmd, mode='wb')
1611 +        if do_compress:
1612 +            newmd = compressOpen(destmd, mode='wb', compress_type=self.compress_type)
1613 +        else:
1614 +            newmd = open(destmd, 'wb')
1615 +
1616          newmd.write(md)
1617          newmd.close()
1618          print "Wrote:", destmd
1619 @@ -91,11 +133,8 @@ class RepoMetadata:
1620          csum, destmd = checksum_and_rename(destmd, self.checksum_type)
1621          base_destmd = os.path.basename(destmd)
1622
1623 -
1624 -        ## Remove any stale metadata
1625 -        if mdtype in self.repoobj.repoData:
1626 -            del self.repoobj.repoData[mdtype]
1627 -
1628 +        # Remove any stale metadata
1629 +        old_rd = self.repoobj.repoData.pop(mdtype, None)
1630
1631          new_rd = RepoData()
1632          new_rd.type = mdtype
1633 @@ -105,18 +144,28 @@ class RepoMetadata:
1634          new_rd.size = str(os.stat(destmd).st_size)
1635          new_rd.timestamp = str(os.stat(destmd).st_mtime)
1636          self.repoobj.repoData[new_rd.type] = new_rd
1637 -
1638 -        print "           type =", new_rd.type
1639 -        print "       location =", new_rd.location[1]
1640 -        print "       checksum =", new_rd.checksum[1]
1641 -        print "      timestamp =", new_rd.timestamp
1642 -        print "  open-checksum =", new_rd.openchecksum[1]
1643 -
1644 -        ## Write the updated repomd.xml
1645 -        outmd = file(self.repomdxml, 'w')
1646 -        outmd.write(self.repoobj.dump_xml())
1647 -        outmd.close()
1648 -        print "Wrote:", self.repomdxml
1649 +        self._print_repodata(new_rd)
1650 +        self._write_repomd()
1651 +
1652 +        if old_rd is not None and old_rd.location[1] != new_rd.location[1]:
1653 +            # remove the old file when overwriting metadata
1654 +            # with the same mdtype but different location
1655 +            self._remove_repodata_file(old_rd)
1656 +
1657 +    def remove(self, metadata, mdtype=None):
1658 +        """ Remove metadata from this repository. """
1659 +        mdname = metadata
1660 +        mdtype = self._get_mdtype(mdname, mdtype)
1661 +
1662 +        old_rd = self.repoobj.repoData.pop(mdtype, None)
1663 +        if old_rd is None:
1664 +            print "Metadata not found: %s" % mdtype
1665 +            return
1666 +
1667 +        self._remove_repodata_file(old_rd)
1668 +        print "Removed:"
1669 +        self._print_repodata(old_rd)
1670 +        self._write_repomd()
1671
1672
1673  def main(args):
1674 @@ -124,7 +173,13 @@ def main(args):
1675      # query options
1676      parser.add_option("--mdtype", dest='mdtype',
1677                        help="specific datatype of the metadata, will be derived from the filename if not specified")
1678 -    parser.usage = "modifyrepo [options] <input_metadata> <output repodata>"
1679 +    parser.add_option("--remove", action="store_true",
1680 +                      help="remove specified file from repodata")
1681 +    parser.add_option("--compress", action="store_true", default=False,
1682 +                      help="compress the new repodata before adding it to the repo")
1683 +    parser.add_option("--compress-type", dest='compress_type', default='gz',
1684 +                      help="compression format to use")
1685 +    parser.usage = "modifyrepo [options] [--remove] <input_metadata> <output repodata>"
1686
1687      (opts, argsleft) = parser.parse_args(args)
1688      if len(argsleft) != 2:
1689 @@ -137,11 +192,28 @@ def main(args):
1690      except MDError, e:
1691          print "Could not access repository: %s" % str(e)
1692          return 1
1693 +
1694 +
1695 +    repomd.compress = opts.compress
1696 +    if opts.compress_type in _available_compression:
1697 +        repomd.compress_type = opts.compress_type
1698 +
1699 +    # remove
1700 +    if opts.remove:
1701 +        try:
1702 +            repomd.remove(metadata)
1703 +        except MDError, ex:
1704 +            print "Could not remove metadata: %s" % (metadata, str(ex))
1705 +            return 1
1706 +        return
1707 +
1708 +    # add
1709      try:
1710          repomd.add(metadata, mdtype=opts.mdtype)
1711      except MDError, e:
1712          print "Could not add metadata from file %s: %s" % (metadata, str(e))
1713          return 1
1714 +
1715
1716  if __name__ == '__main__':
1717      ret = main(sys.argv[1:])
1718 diff --git a/worker.py b/worker.py
1719 index eb35ef7..fe6758f 100755
1720 --- a/worker.py
1721 +++ b/worker.py
1722 @@ -5,6 +5,7 @@ import yum
1723  import createrepo
1724  import os
1725  import rpmUtils
1726 +import re
1727  from optparse import OptionParser
1728
1729
1730 @@ -23,6 +24,8 @@ def main(args):
1731      parser = OptionParser()
1732      parser.add_option('--tmpmdpath', default=None,
1733                  help="path where the outputs should be dumped for this worker")
1734 +    parser.add_option('--pkglist', default=None,
1735 +                help="file to read the pkglist from in lieu of all of them on the cli")
1736      parser.add_option("--pkgoptions", default=[], action='append',
1737                  help="pkgoptions in the format of key=value")
1738      parser.add_option("--quiet", default=False, action='store_true',
1739 @@ -36,10 +39,6 @@ def main(args):
1740      opts, pkgs = parser.parse_args(args)
1741      external_data = {'_packagenumber': 1}
1742      globalopts = {}
1743 -    if not opts.tmpmdpath:
1744 -        print >> sys.stderr, "tmpmdpath required for destination files"
1745 -        sys.exit(1)
1746 -
1747
1748      for strs in opts.pkgoptions:
1749          k,v = strs.split('=')
1750 @@ -64,15 +63,34 @@ def main(args):
1751
1752      reldir = external_data['_reldir']
1753      ts = rpmUtils.transaction.initReadOnlyTransaction()
1754 -    pri = open(opts.tmpmdpath + '/primary.xml' , 'w')
1755 -    fl = open(opts.tmpmdpath  + '/filelists.xml' , 'w')
1756 -    other = open(opts.tmpmdpath  + '/other.xml' , 'w')
1757 -
1758 -
1759 +    if opts.tmpmdpath:
1760 +        files = [open(opts.tmpmdpath + '/%s.xml' % i, 'w')
1761 +                 for i in ('primary', 'filelists', 'other')]
1762 +        def output(*xml):
1763 +            for fh, buf in zip(files, xml):
1764 +                fh.write(buf)
1765 +    else:
1766 +        def output(*xml):
1767 +            buf = ' '.join(str(len(i)) for i in xml)
1768 +            sys.stdout.write('*** %s\n' % buf)
1769 +            for buf in xml:
1770 +                sys.stdout.write(buf)
1771 +
1772 +    if opts.pkglist:
1773 +        for line in open(opts.pkglist,'r').readlines():
1774 +            line = line.strip()
1775 +            if re.match('^\s*\#.*', line) or re.match('^\s*$', line):
1776 +                continue
1777 +            pkgs.append(line)
1778 +
1779 +    clog_limit=globalopts.get('clog_limit', None)
1780 +    if clog_limit is not None:
1781 +         clog_limit = int(clog_limit)
1782      for pkgfile in pkgs:
1783          pkgpath = reldir + '/' + pkgfile
1784          if not os.path.exists(pkgpath):
1785              print >> sys.stderr, "File not found: %s" % pkgpath
1786 +            output()
1787              continue
1788
1789          try:
1790 @@ -80,20 +98,17 @@ def main(args):
1791                  print "reading %s" % (pkgfile)
1792
1793              pkg = createrepo.yumbased.CreateRepoPackage(ts, package=pkgpath,
1794 -                                                        external_data=external_data)
1795 -            pri.write(pkg.xml_dump_primary_metadata())
1796 -            fl.write(pkg.xml_dump_filelists_metadata())
1797 -            other.write(pkg.xml_dump_other_metadata(clog_limit=
1798 -                                            globalopts.get('clog_limit', None)))
1799 +                                sumtype=globalopts.get('sumtype', None),
1800 +                                external_data=external_data)
1801 +            output(pkg.xml_dump_primary_metadata(),
1802 +                   pkg.xml_dump_filelists_metadata(),
1803 +                   pkg.xml_dump_other_metadata(clog_limit=clog_limit))
1804          except yum.Errors.YumBaseError, e:
1805              print >> sys.stderr, "Error: %s" % e
1806 +            output()
1807              continue
1808          else:
1809              external_data['_packagenumber']+=1
1810
1811 -    pri.close()
1812 -    fl.close()
1813 -    other.close()
1814 -
1815  if __name__ == "__main__":
1816      main(sys.argv[1:])