]> git.pld-linux.org Git - packages/createrepo.git/blob - createrepo-head.patch
update code from git master, add bash-completion subpackage
[packages/createrepo.git] / createrepo-head.patch
1 diff --git a/createrepo.bash b/createrepo.bash
2 index 54ac8b2..f5a8bb7 100644
3 --- a/createrepo.bash
4 +++ b/createrepo.bash
5 @@ -1,11 +1,17 @@
6  # bash completion for createrepo and friends
7  
8 +_cr_compress_type()
9 +{
10 +    COMPREPLY=( $( compgen -W "$( ${1:-createrepo} --compress-type=FOO / 2>&1 \
11 +        | sed -ne 's/,/ /g' -ne 's/.*[Cc]ompression.*://p' )" -- "$2" ) )
12 +}
13 +
14  _cr_createrepo()
15  {
16      COMPREPLY=()
17  
18      case $3 in
19 -        --version|-h|--help|-u|--baseurl|--distro|--content|--repo|--workers|\
20 +        --version|-h|--help|-u|--baseurl|--distro|--content|--repo|\
21          --revision|-x|--excludes|--changelog-limit|--max-delta-rpm-size)
22              return 0
23              ;;
24 @@ -30,10 +36,24 @@ _cr_createrepo()
25              COMPREPLY=( $( compgen -f -o plusdirs -X '!*.rpm' -- "$2" ) )
26              return 0
27              ;;
28 +        --retain-old-md)
29 +            COMPREPLY=( $( compgen -W '0 1 2 3 4 5 6 7 8 9' -- "$2" ) )
30 +            return 0
31 +            ;;
32          --num-deltas)
33              COMPREPLY=( $( compgen -W '1 2 3 4 5 6 7 8 9' -- "$2" ) )
34              return 0
35              ;;
36 +        --workers)
37 +            local min=2 max=$( getconf _NPROCESSORS_ONLN 2>/dev/null )
38 +            [[ -z $max || $max -lt $min ]] && max=$min
39 +            COMPREPLY=( $( compgen -W "{1..$max}" -- "$2" ) )
40 +            return 0
41 +            ;;
42 +        --compress-type)
43 +            _cr_compress_type "$1" "$2"
44 +            return 0
45 +            ;;
46      esac
47  
48      if [[ $2 == -* ]] ; then
49 @@ -42,9 +62,9 @@ _cr_createrepo()
50              --cachedir --checkts --no-database --update --update-md-path
51              --skip-stat --split --pkglist --includepkg --outputdir
52              --skip-symlinks --changelog-limit --unique-md-filenames
53 -            --simple-md-filenames --distro --content --repo --revision --deltas
54 -            --oldpackagedirs --num-deltas --read-pkgs-list
55 -            --max-delta-rpm-size --workers' -- "$2" ) )
56 +            --simple-md-filenames --retain-old-md --distro --content --repo
57 +            --revision --deltas --oldpackagedirs --num-deltas --read-pkgs-list
58 +            --max-delta-rpm-size --workers --compress-type' -- "$2" ) )
59      else
60          COMPREPLY=( $( compgen -d -- "$2" ) )
61      fi
62 @@ -63,10 +83,14 @@ _cr_mergerepo()
63              COMPREPLY=( $( compgen -d -- "$2" ) )
64              return 0
65              ;;
66 +        --compress-type)
67 +            _cr_compress_type "" "$2"
68 +            return 0
69 +            ;;
70      esac
71  
72      COMPREPLY=( $( compgen -W '--version --help --repo --archlist --no-database
73 -        --outputdir --nogroups --noupdateinfo' -- "$2" ) )
74 +        --outputdir --nogroups --noupdateinfo --compress-type' -- "$2" ) )
75  } &&
76  complete -F _cr_mergerepo -o filenames mergerepo mergerepo.py
77  
78 @@ -78,17 +102,22 @@ _cr_modifyrepo()
79          --version|-h|--help|--mdtype)
80              return 0
81              ;;
82 +        --compress-type)
83 +            _cr_compress_type "" "$2"
84 +            return 0
85 +            ;;
86      esac
87  
88      if [[ $2 == -* ]] ; then
89 -        COMPREPLY=( $( compgen -W '--version --help --mdtype' -- "$2" ) )
90 +        COMPREPLY=( $( compgen -W '--version --help --mdtype --remove
91 +            --compress --compress-type' -- "$2" ) )
92          return 0
93      fi
94  
95      local i argnum=1
96      for (( i=1; i < ${#COMP_WORDS[@]}-1; i++ )) ; do
97          if [[ ${COMP_WORDS[i]} != -* &&
98 -                    ${COMP_WORDS[i-1]} != @(=|--mdtype) ]]; then
99 +              ${COMP_WORDS[i-1]} != @(=|--@(md|compress-)type) ]]; then
100              argnum=$(( argnum+1 ))
101          fi
102      done
103 diff --git a/createrepo.spec b/createrepo.spec
104 index 1e491cd..eea7092 100644
105 --- a/createrepo.spec
106 +++ b/createrepo.spec
107 @@ -11,7 +11,7 @@ URL: http://createrepo.baseurl.org/
108  BuildRoot: %{_tmppath}/%{name}-%{version}root
109  BuildArchitectures: noarch
110  Requires: python >= 2.1, rpm-python, rpm >= 0:4.1.1, libxml2-python
111 -Requires: yum-metadata-parser, yum >= 3.2.29, python-deltarpm
112 +Requires: yum-metadata-parser, yum >= 3.2.29, python-deltarpm, pyliblzma
113  
114  %description
115  This utility will generate a common metadata repository from a directory of
116 @@ -43,6 +43,9 @@ make DESTDIR=$RPM_BUILD_ROOT sysconfdir=%{_sysconfdir} install
117  %{python_sitelib}/createrepo
118  
119  %changelog
120 +* Fri Sep  9 2011 Seth Vidal <skvidal at fedoraproject.org>
121 +- add lzma dep
122 +
123  * Wed Jan 26 2011 Seth Vidal <skvidal at fedoraproject.org>
124  - bump to 0.9.9
125  - add worker.py
126 diff --git a/createrepo/__init__.py b/createrepo/__init__.py
127 index 8f2538e..1b18a9f 100644
128 --- a/createrepo/__init__.py
129 +++ b/createrepo/__init__.py
130 @@ -26,15 +26,16 @@ import tempfile
131  import stat
132  import fcntl
133  import subprocess
134 +from select import select
135  
136 -from yum import misc, Errors, to_unicode
137 -from yum.repoMDObject import RepoMD, RepoMDError, RepoData
138 +from yum import misc, Errors
139 +from yum.repoMDObject import RepoMD, RepoData
140  from yum.sqlutils import executeSQL
141  from yum.packageSack import MetaSack
142 -from yum.packages import YumAvailablePackage, YumLocalPackage
143 +from yum.packages import YumAvailablePackage
144  
145  import rpmUtils.transaction
146 -from utils import _, errorprint, MDError
147 +from utils import _, errorprint, MDError, lzma, _available_compression
148  import readMetadata
149  try:
150      import sqlite3 as sqlite
151 @@ -46,8 +47,9 @@ try:
152  except ImportError:
153      pass
154  
155 -from utils import _gzipOpen, bzipFile, checkAndMakeDir, GzipFile, \
156 +from utils import _gzipOpen, compressFile, compressOpen, checkAndMakeDir, GzipFile, \
157                    checksum_and_rename, split_list_into_equal_chunks
158 +from utils import num_cpus_online
159  import deltarpms
160  
161  __version__ = '0.9.9'
162 @@ -74,7 +76,7 @@ class MetaDataConfig(object):
163          self.deltadir = None
164          self.delta_relative = 'drpms/'
165          self.oldpackage_paths = [] # where to look for the old packages -
166 -        self.deltafile = 'prestodelta.xml.gz'
167 +        self.deltafile = 'prestodelta.xml'
168          self.num_deltas = 1 # number of older versions to delta (max)
169          self.max_delta_rpm_size = 100000000
170          self.update_md_path = None
171 @@ -86,9 +88,9 @@ class MetaDataConfig(object):
172          self.skip_symlinks = False
173          self.pkglist = []
174          self.database_only = False
175 -        self.primaryfile = 'primary.xml.gz'
176 -        self.filelistsfile = 'filelists.xml.gz'
177 -        self.otherfile = 'other.xml.gz'
178 +        self.primaryfile = 'primary.xml'
179 +        self.filelistsfile = 'filelists.xml'
180 +        self.otherfile = 'other.xml'
181          self.repomdfile = 'repomd.xml'
182          self.tempdir = '.repodata'
183          self.finaldir = 'repodata'
184 @@ -108,8 +110,10 @@ class MetaDataConfig(object):
185          self.collapse_glibc_requires = True
186          self.workers = 1 # number of workers to fork off to grab metadata from the pkgs
187          self.worker_cmd = '/usr/share/createrepo/worker.py'
188 -        
189          #self.worker_cmd = './worker.py' # helpful when testing
190 +        self.retain_old_md = 0
191 +        self.compress_type = 'compat'
192 +
193          
194  class SimpleMDCallBack(object):
195      def errorlog(self, thing):
196 @@ -141,10 +145,23 @@ class MetaDataGenerator:
197          self.files = []
198          self.rpmlib_reqs = {}
199          self.read_pkgs = []
200 +        self.compat_compress = False
201  
202          if not self.conf.directory and not self.conf.directories:
203              raise MDError, "No directory given on which to run."
204 -
205 +        
206 +        if self.conf.compress_type == 'compat':
207 +            self.compat_compress = True
208 +            self.conf.compress_type = None
209 +            
210 +        if not self.conf.compress_type:
211 +            self.conf.compress_type = 'gz'
212 +        
213 +        if self.conf.compress_type not in utils._available_compression:
214 +            raise MDError, "Compression %s not available: Please choose from: %s" \
215 +                 % (self.conf.compress_type, ', '.join(utils._available_compression))
216 +            
217 +            
218          if not self.conf.directories: # just makes things easier later
219              self.conf.directories = [self.conf.directory]
220          if not self.conf.directory: # ensure we have both in the config object
221 @@ -290,14 +307,13 @@ class MetaDataGenerator:
222  
223          def extension_visitor(filelist, dirname, names):
224              for fn in names:
225 +                fn = os.path.join(dirname, fn)
226                  if os.path.isdir(fn):
227                      continue
228                  if self.conf.skip_symlinks and os.path.islink(fn):
229                      continue
230                  elif fn[-extlen:].lower() == '%s' % (ext):
231 -                    relativepath = dirname.replace(startdir, "", 1)
232 -                    relativepath = relativepath.lstrip("/")
233 -                    filelist.append(os.path.join(relativepath, fn))
234 +                    filelist.append(fn[len(startdir):])
235  
236          filelist = []
237          startdir = directory + '/'
238 @@ -311,7 +327,7 @@ class MetaDataGenerator:
239      def checkTimeStamps(self):
240          """check the timestamp of our target dir. If it is not newer than
241             the repodata return False, else True"""
242 -        if self.conf.checkts:
243 +        if self.conf.checkts and self.conf.mdtimestamp:
244              dn = os.path.join(self.conf.basedir, self.conf.directory)
245              files = self.getFileList(dn, '.rpm')
246              files = self.trimRpms(files)
247 @@ -410,9 +426,11 @@ class MetaDataGenerator:
248  
249      def _setupPrimary(self):
250          # setup the primary metadata file
251 +        # FIXME - make this be  conf.compress_type once y-m-p is fixed
252 +        fpz = self.conf.primaryfile + '.' + 'gz'
253          primaryfilepath = os.path.join(self.conf.outputdir, self.conf.tempdir,
254 -                                       self.conf.primaryfile)
255 -        fo = _gzipOpen(primaryfilepath, 'w')
256 +                                       fpz)
257 +        fo = compressOpen(primaryfilepath, 'w', 'gz')
258          fo.write('<?xml version="1.0" encoding="UTF-8"?>\n')
259          fo.write('<metadata xmlns="http://linux.duke.edu/metadata/common"' \
260              ' xmlns:rpm="http://linux.duke.edu/metadata/rpm" packages="%s">' %
261 @@ -421,9 +439,11 @@ class MetaDataGenerator:
262  
263      def _setupFilelists(self):
264          # setup the filelist file
265 +        # FIXME - make this be  conf.compress_type once y-m-p is fixed        
266 +        fpz = self.conf.filelistsfile + '.' + 'gz'
267          filelistpath = os.path.join(self.conf.outputdir, self.conf.tempdir,
268 -                                    self.conf.filelistsfile)
269 -        fo = _gzipOpen(filelistpath, 'w')
270 +                                    fpz)
271 +        fo = compressOpen(filelistpath, 'w', 'gz')
272          fo.write('<?xml version="1.0" encoding="UTF-8"?>\n')
273          fo.write('<filelists xmlns="http://linux.duke.edu/metadata/filelists"' \
274                   ' packages="%s">' % self.pkgcount)
275 @@ -431,9 +451,11 @@ class MetaDataGenerator:
276  
277      def _setupOther(self):
278          # setup the other file
279 +        # FIXME - make this be  conf.compress_type once y-m-p is fixed        
280 +        fpz = self.conf.otherfile + '.' + 'gz'
281          otherfilepath = os.path.join(self.conf.outputdir, self.conf.tempdir,
282 -                                     self.conf.otherfile)
283 -        fo = _gzipOpen(otherfilepath, 'w')
284 +                                     fpz)
285 +        fo = compressOpen(otherfilepath, 'w', 'gz')
286          fo.write('<?xml version="1.0" encoding="UTF-8"?>\n')
287          fo.write('<otherdata xmlns="http://linux.duke.edu/metadata/other"' \
288                   ' packages="%s">' %
289 @@ -442,9 +464,10 @@ class MetaDataGenerator:
290  
291      def _setupDelta(self):
292          # setup the other file
293 +        fpz = self.conf.deltafile + '.' + self.conf.compress_type        
294          deltafilepath = os.path.join(self.conf.outputdir, self.conf.tempdir,
295 -                                     self.conf.deltafile)
296 -        fo = _gzipOpen(deltafilepath, 'w')
297 +                                     fpz)
298 +        fo = compressOpen(deltafilepath, 'w', self.conf.compress_type)
299          fo.write('<?xml version="1.0" encoding="UTF-8"?>\n')
300          fo.write('<prestodelta>\n')
301          return fo
302 @@ -520,6 +543,7 @@ class MetaDataGenerator:
303          # go on their merry way
304          
305          newpkgs = []
306 +        keptpkgs = []
307          if self.conf.update:
308              # if we're in --update mode then only act on the new/changed pkgs
309              for pkg in pkglist:
310 @@ -530,39 +554,13 @@ class MetaDataGenerator:
311                  old_pkg = pkg
312                  if pkg.find("://") != -1:
313                      old_pkg = os.path.basename(pkg)
314 -                nodes = self.oldData.getNodes(old_pkg)
315 -                if nodes is not None: # we have a match in the old metadata
316 +                old_po = self.oldData.getNodes(old_pkg)
317 +                if old_po: # we have a match in the old metadata
318                      if self.conf.verbose:
319                          self.callback.log(_("Using data from old metadata for %s")
320                                              % pkg)
321 -                    (primarynode, filenode, othernode) = nodes
322 -
323 -                    for node, outfile in ((primarynode, self.primaryfile),
324 -                                          (filenode, self.flfile),
325 -                                          (othernode, self.otherfile)):
326 -                        if node is None:
327 -                            break
328 -
329 -                        if self.conf.baseurl:
330 -                            anode = node.children
331 -                            while anode is not None:
332 -                                if anode.type != "element":
333 -                                    anode = anode.next
334 -                                    continue
335 -                                if anode.name == "location":
336 -                                    anode.setProp('xml:base', self.conf.baseurl)
337 -                                anode = anode.next
338 -
339 -                        output = node.serialize('UTF-8', self.conf.pretty)
340 -                        if output:
341 -                            outfile.write(output)
342 -                        else:
343 -                            if self.conf.verbose:
344 -                                self.callback.log(_("empty serialize on write to" \
345 -                                                    "%s in %s") % (outfile, pkg))
346 -                        outfile.write('\n')
347 -
348 -                    self.oldData.freeNodes(pkg)
349 +                    keptpkgs.append((pkg, old_po))
350 +
351                      #FIXME - if we're in update and we have deltas enabled
352                      # check the presto data for this pkg and write its info back out
353                      # to our deltafile
354 @@ -584,32 +582,45 @@ class MetaDataGenerator:
355              po = None
356              if isinstance(pkg, YumAvailablePackage):
357                  po = pkg
358 -                self.read_pkgs.append(po.localpath)
359 +                self.read_pkgs.append(po.localPkg())
360  
361              # if we're dealing with remote pkgs - pitch it over to doing
362              # them one at a time, for now. 
363              elif pkg.find('://') != -1:
364 -                po = self.read_in_package(pkgfile, pkgpath=pkgpath, reldir=reldir)
365 +                po = self.read_in_package(pkg, pkgpath=pkgpath, reldir=reldir)
366                  self.read_pkgs.append(pkg)
367              
368              if po:
369 -                self.primaryfile.write(po.xml_dump_primary_metadata())
370 -                self.flfile.write(po.xml_dump_filelists_metadata())
371 -                self.otherfile.write(po.xml_dump_other_metadata(
372 -                                     clog_limit=self.conf.changelog_limit))
373 +                keptpkgs.append((pkg, po))
374                  continue
375                  
376              pkgfiles.append(pkg)
377 -            
378 -       
379 +
380 +        keptpkgs.sort(reverse=True)
381 +        # keptkgs is a list of (filename, po), pkgfiles is a list if filenames.
382 +        # Need to write them in sorted(filename) order.  We loop over pkgfiles,
383 +        # inserting keptpkgs in right spots (using the upto argument).
384 +        def save_keptpkgs(upto):
385 +            while keptpkgs and (upto is None or keptpkgs[-1][0] < upto):
386 +                filename, po = keptpkgs.pop()
387 +                # reset baseurl in the old pkg
388 +                po.basepath = self.conf.baseurl
389 +                self.primaryfile.write(po.xml_dump_primary_metadata())
390 +                self.flfile.write(po.xml_dump_filelists_metadata())
391 +                self.otherfile.write(po.xml_dump_other_metadata(
392 +                    clog_limit=self.conf.changelog_limit))
393 +
394          if pkgfiles:
395              # divide that list by the number of workers and fork off that many
396              # workers to tmpdirs
397              # waitfor the workers to finish and as each one comes in
398              # open the files they created and write them out to our metadata
399              # add up the total pkg counts and return that value
400 -            worker_tmp_path = tempfile.mkdtemp()
401 -            worker_chunks = utils.split_list_into_equal_chunks(pkgfiles,  self.conf.workers)
402 +            self._worker_tmp_path = tempfile.mkdtemp() # setting this in the base object so we can clean it up later
403 +            if self.conf.workers < 1:
404 +                self.conf.workers = num_cpus_online()
405 +            pkgfiles.sort()
406 +            worker_chunks = split_list_into_equal_chunks(pkgfiles, self.conf.workers)
407              worker_cmd_dict = {}
408              worker_jobs = {}
409              base_worker_cmdline = [self.conf.worker_cmd, 
410 @@ -617,7 +628,8 @@ class MetaDataGenerator:
411                      '--pkgoptions=_collapse_libc_requires=%s' % self.conf.collapse_glibc_requires, 
412                      '--pkgoptions=_cachedir=%s' % self.conf.cachedir,
413                      '--pkgoptions=_baseurl=%s' % self.conf.baseurl,
414 -                    '--globalopts=clog_limit=%s' % self.conf.changelog_limit,]
415 +                    '--globalopts=clog_limit=%s' % self.conf.changelog_limit,
416 +                    '--globalopts=sumtype=%s' % self.conf.sumtype, ]
417              
418              if self.conf.quiet:
419                  base_worker_cmdline.append('--quiet')
420 @@ -626,15 +638,14 @@ class MetaDataGenerator:
421                  base_worker_cmdline.append('--verbose')
422                  
423              for worker_num in range(self.conf.workers):
424 -                # make the worker directory
425 +                pkl = self._worker_tmp_path + '/pkglist-%s' % worker_num
426 +                f = open(pkl, 'w') 
427 +                f.write('\n'.join(worker_chunks[worker_num]))
428 +                f.close()
429 +                
430                  workercmdline = []
431                  workercmdline.extend(base_worker_cmdline)
432 -                thisdir = worker_tmp_path + '/' + str(worker_num)
433 -                if checkAndMakeDir(thisdir):
434 -                    workercmdline.append('--tmpmdpath=%s' % thisdir)
435 -                else:
436 -                    raise MDError, "Unable to create worker path: %s" % thisdir
437 -                workercmdline.extend(worker_chunks[worker_num])
438 +                workercmdline.append('--pkglist=%s/pkglist-%s' % (self._worker_tmp_path, worker_num))
439                  worker_cmd_dict[worker_num] = workercmdline
440              
441                  
442 @@ -647,49 +658,60 @@ class MetaDataGenerator:
443                                          stderr=subprocess.PIPE)
444                  worker_jobs[num] = job
445              
446 -            gimmebreak = 0
447 -            while gimmebreak != len(worker_jobs.keys()):
448 -                gimmebreak = 0
449 -                for (num,job) in worker_jobs.items():
450 -                    if job.poll() is not None:
451 -                        gimmebreak+=1
452 -                    line = job.stdout.readline()
453 -                    if line:
454 +            files = self.primaryfile, self.flfile, self.otherfile
455 +            def log_messages(num):
456 +                job = worker_jobs[num]
457 +                while True:
458 +                    # check stdout and stderr
459 +                    for stream in select((job.stdout, job.stderr), (), ())[0]:
460 +                        line = stream.readline()
461 +                        if line: break
462 +                    else:
463 +                        return # EOF, EOF
464 +                    if stream is job.stdout:
465 +                        if line.startswith('*** '):
466 +                            # get data, save to local files
467 +                            for out, size in zip(files, line[4:].split()):
468 +                                out.write(stream.read(int(size)))
469 +                            return
470                          self.callback.log('Worker %s: %s' % (num, line.rstrip()))
471 -                    line = job.stderr.readline()
472 -                    if line:
473 +                    else:
474                          self.callback.errorlog('Worker %s: %s' % (num, line.rstrip()))
475 +
476 +            for i, pkg in enumerate(pkgfiles):
477 +                # insert cached packages
478 +                save_keptpkgs(pkg)
479 +
480 +                # save output to local files
481 +                log_messages(i % self.conf.workers)
482 +
483 +            for (num, job) in worker_jobs.items():
484 +                # process remaining messages on stderr
485 +                log_messages(num)
486 +
487 +                if job.wait() != 0:
488 +                    msg = "Worker exited with non-zero value: %s. Fatal." % job.returncode
489 +                    self.callback.errorlog(msg)
490 +                    raise MDError, msg
491                      
492 -                
493              if not self.conf.quiet:
494                  self.callback.log("Workers Finished")
495 -            # finished with workers
496 -            # go to their dirs and add the contents
497 -            if not self.conf.quiet:
498 -                self.callback.log("Gathering worker results")
499 -            for num in range(self.conf.workers):
500 -                for (fn, fo) in (('primary.xml', self.primaryfile), 
501 -                           ('filelists.xml', self.flfile),
502 -                           ('other.xml', self.otherfile)):
503 -                    fnpath = worker_tmp_path + '/' + str(num) + '/' + fn
504 -                    if os.path.exists(fnpath):
505 -                        fo.write(open(fnpath, 'r').read())
506 -
507                      
508              for pkgfile in pkgfiles:
509                  if self.conf.deltas:
510 -                    po = self.read_in_package(pkgfile, pkgpath=pkgpath, reldir=reldir)
511 -                    self._do_delta_rpm_package(po)
512 +                    try:
513 +                        po = self.read_in_package(pkgfile, pkgpath=pkgpath, reldir=reldir)
514 +                        self._do_delta_rpm_package(po)
515 +                    except MDError, e:
516 +                        errorprint(e)
517 +                        continue
518                  self.read_pkgs.append(pkgfile)
519  
520 +        save_keptpkgs(None) # append anything left
521          return self.current_pkg
522  
523  
524      def closeMetadataDocs(self):
525 -        if not self.conf.quiet:
526 -            self.callback.log('')
527 -
528 -
529          # save them up to the tmp locations:
530          if not self.conf.quiet:
531              self.callback.log(_('Saving Primary metadata'))
532 @@ -784,7 +806,6 @@ class MetaDataGenerator:
533              return self._old_package_dict
534  
535          self._old_package_dict = {}
536 -        opl = []
537          for d in self.conf.oldpackage_paths:
538              for f in self.getFileList(d, '.rpm'):
539                  fp = d + '/' + f
540 @@ -833,7 +854,7 @@ class MetaDataGenerator:
541          return ' '.join(results)
542  
543      def _createRepoDataObject(self, mdfile, mdtype, compress=True, 
544 -                              compress_type='gzip', attribs={}):
545 +                              compress_type=None, attribs={}):
546          """return random metadata as RepoData object to be  added to RepoMD
547             mdfile = complete path to file
548             mdtype = the metadata type to use
549 @@ -843,15 +864,13 @@ class MetaDataGenerator:
550          sfile = os.path.basename(mdfile)
551          fo = open(mdfile, 'r')
552          outdir = os.path.join(self.conf.outputdir, self.conf.tempdir)
553 +        if not compress_type:
554 +            compress_type = self.conf.compress_type
555          if compress:
556 -            if compress_type == 'gzip':
557 -                sfile = '%s.gz' % sfile
558 -                outfn = os.path.join(outdir, sfile)
559 -                output = GzipFile(filename = outfn, mode='wb')
560 -            elif compress_type == 'bzip2':
561 -                sfile = '%s.bz2' % sfile
562 -                outfn = os.path.join(outdir, sfile)
563 -                output = BZ2File(filename = outfn, mode='wb')
564 +            sfile = '%s.%s' % (sfile, compress_type)
565 +            outfn = os.path.join(outdir, sfile)
566 +            output = compressOpen(outfn, mode='wb', compress_type=compress_type)
567 +                
568          else:
569              outfn  = os.path.join(outdir, sfile)
570              output = open(outfn, 'w')
571 @@ -874,14 +893,13 @@ class MetaDataGenerator:
572  
573          thisdata = RepoData()
574          thisdata.type = mdtype
575 -        baseloc = None
576          thisdata.location = (self.conf.baseurl, os.path.join(self.conf.finaldir, sfile))
577          thisdata.checksum = (self.conf.sumtype, csum)
578          if compress:
579              thisdata.openchecksum  = (self.conf.sumtype, open_csum)
580          
581          thisdata.size = str(os.stat(outfn).st_size)
582 -        thisdata.timestamp = str(os.stat(outfn).st_mtime)
583 +        thisdata.timestamp = str(int(os.stat(outfn).st_mtime))
584          for (k, v) in attribs.items():
585              setattr(thisdata, k, str(v))
586          
587 @@ -925,9 +943,14 @@ class MetaDataGenerator:
588              rp = sqlitecachec.RepodataParserSqlite(repopath, repomd.repoid, None)
589  
590          for (rpm_file, ftype) in workfiles:
591 +            # when we fix y-m-p and non-gzipped xml files - then we can make this just add
592 +            # self.conf.compress_type
593 +            if ftype in ('other', 'filelists', 'primary'):
594 +                rpm_file = rpm_file + '.' + 'gz'
595 +            elif rpm_file.find('.') != -1 and rpm_file.split('.')[-1] not in _available_compression:
596 +                rpm_file = rpm_file + '.' + self.conf.compress_type
597              complete_path = os.path.join(repopath, rpm_file)
598 -
599 -            zfo = _gzipOpen(complete_path)
600 +            zfo = compressOpen(complete_path)
601              # This is misc.checksum() done locally so we can get the size too.
602              data = misc.Checksums([sumtype])
603              while data.read(zfo, 2**16):
604 @@ -966,14 +989,20 @@ class MetaDataGenerator:
605                      good_name = '%s.sqlite' % ftype
606                      resultpath = os.path.join(repopath, good_name)
607  
608 +                    # compat compression for rhel5 compatibility from fedora :(
609 +                    compress_type = self.conf.compress_type
610 +                    if self.compat_compress:
611 +                        compress_type = 'bz2'
612 +                        
613                      # rename from silly name to not silly name
614                      os.rename(tmp_result_path, resultpath)
615 -                    compressed_name = '%s.bz2' % good_name
616 +                    compressed_name = '%s.%s' % (good_name, compress_type)
617                      result_compressed = os.path.join(repopath, compressed_name)
618                      db_csums[ftype] = misc.checksum(sumtype, resultpath)
619  
620                      # compress the files
621 -                    bzipFile(resultpath, result_compressed)
622 +
623 +                    compressFile(resultpath, result_compressed, compress_type)
624                      # csum the compressed file
625                      db_compressed_sums[ftype] = misc.checksum(sumtype,
626                                                               result_compressed)
627 @@ -983,8 +1012,8 @@ class MetaDataGenerator:
628                      os.unlink(resultpath)
629  
630                      if self.conf.unique_md_filenames:
631 -                        csum_compressed_name = '%s-%s.bz2' % (
632 -                                           db_compressed_sums[ftype], good_name)
633 +                        csum_compressed_name = '%s-%s.%s' % (
634 +                                           db_compressed_sums[ftype], good_name, compress_type)
635                          csum_result_compressed =  os.path.join(repopath,
636                                                             csum_compressed_name)
637                          os.rename(result_compressed, csum_result_compressed)
638 @@ -1001,7 +1030,7 @@ class MetaDataGenerator:
639                      data.location = (self.conf.baseurl, 
640                                os.path.join(self.conf.finaldir, compressed_name))
641                      data.checksum = (sumtype, db_compressed_sums[ftype])
642 -                    data.timestamp = str(db_stat.st_mtime)
643 +                    data.timestamp = str(int(db_stat.st_mtime))
644                      data.size = str(db_stat.st_size)
645                      data.opensize = str(un_stat.st_size)
646                      data.openchecksum = (sumtype, db_csums[ftype])
647 @@ -1020,7 +1049,13 @@ class MetaDataGenerator:
648              data.openchecksum = (sumtype, uncsum)
649  
650              if self.conf.unique_md_filenames:
651 -                res_file = '%s-%s.xml.gz' % (csum, ftype)
652 +                if ftype in ('primary', 'filelists', 'other'):
653 +                    compress = 'gz'
654 +                else:
655 +                    compress = self.conf.compress_type
656 +                
657 +                main_name = '.'.join(rpm_file.split('.')[:-1])
658 +                res_file = '%s-%s.%s' % (csum, main_name, compress)
659                  orig_file = os.path.join(repopath, rpm_file)
660                  dest_file = os.path.join(repopath, res_file)
661                  os.rename(orig_file, dest_file)
662 @@ -1046,7 +1081,7 @@ class MetaDataGenerator:
663              
664  
665          if self.conf.additional_metadata:
666 -            for md_type, mdfile in self.conf.additional_metadata.items():
667 +            for md_type, md_file in self.conf.additional_metadata.items():
668                  mdcontent = self._createRepoDataObject(md_file, md_type)
669                  repomd.repoData[mdcontent.type] = mdcontent
670                  
671 @@ -1110,23 +1145,43 @@ class MetaDataGenerator:
672                      raise MDError, _(
673                      'Could not remove old metadata file: %s: %s') % (oldfile, e)
674  
675 -        # Move everything else back from olddir (eg. repoview files)
676 -        try:
677 -            old_contents = os.listdir(output_old_dir)
678 -        except (OSError, IOError), e:
679 -            old_contents = []
680 -            
681 +        old_to_remove = []
682 +        old_pr = []
683 +        old_fl = []
684 +        old_ot = []
685 +        old_pr_db = []
686 +        old_fl_db = []
687 +        old_ot_db = []
688          for f in os.listdir(output_old_dir):
689              oldfile = os.path.join(output_old_dir, f)
690              finalfile = os.path.join(output_final_dir, f)
691 -            if f.find('-') != -1 and f.split('-')[1] in ('primary.sqlite.bz2',
692 -                    'filelists.sqlite.bz2', 'primary.xml.gz','other.sqlite.bz2',
693 -                    'other.xml.gz','filelists.xml.gz'):
694 -                os.remove(oldfile) # kill off the old ones
695 -                continue
696 -            if f in ('filelists.sqlite.bz2', 'other.sqlite.bz2',
697 -                     'primary.sqlite.bz2'):
698 -                os.remove(oldfile)
699 +
700 +            for (end,lst) in (('-primary.sqlite', old_pr_db), ('-primary.xml', old_pr),
701 +                           ('-filelists.sqlite', old_fl_db), ('-filelists.xml', old_fl),
702 +                           ('-other.sqlite', old_ot_db), ('-other.xml', old_ot)):
703 +                fn = '.'.join(f.split('.')[:-1])
704 +                if fn.endswith(end):
705 +                    lst.append(oldfile)
706 +                    break
707 +
708 +        # make a list of the old metadata files we don't want to remove.
709 +        for lst in (old_pr, old_fl, old_ot, old_pr_db, old_fl_db, old_ot_db):
710 +            sortlst = sorted(lst, key=lambda x: os.path.getmtime(x),
711 +                             reverse=True)
712 +            for thisf in sortlst[self.conf.retain_old_md:]:
713 +                old_to_remove.append(thisf)
714 +
715 +        for f in os.listdir(output_old_dir):
716 +            oldfile = os.path.join(output_old_dir, f)
717 +            finalfile = os.path.join(output_final_dir, f)
718 +            fn = '.'.join(f.split('.')[:-1])
719 +            if fn in ('filelists.sqlite', 'other.sqlite',
720 +                     'primary.sqlite') or oldfile in old_to_remove:
721 +                try:
722 +                    os.remove(oldfile)
723 +                except (OSError, IOError), e:
724 +                    raise MDError, _(
725 +                    'Could not remove old metadata file: %s: %s') % (oldfile, e)
726                  continue
727  
728              if os.path.exists(finalfile):
729 @@ -1147,14 +1202,19 @@ class MetaDataGenerator:
730                      msg += _('Error was %s') % e
731                      raise MDError, msg
732  
733 -        try:
734 -            os.rmdir(output_old_dir)
735 -        except OSError, e:
736 -            self.errorlog(_('Could not remove old metadata dir: %s')
737 -                          % self.conf.olddir)
738 -            self.errorlog(_('Error was %s') % e)
739 -            self.errorlog(_('Please clean up this directory manually.'))
740 +        self._cleanup_tmp_repodata_dir()
741 +        self._cleanup_update_tmp_dir()        
742 +        self._write_out_read_pkgs_list()
743 +
744  
745 +    def _cleanup_update_tmp_dir(self):
746 +        if not self.conf.update:
747 +            return
748 +        
749 +        shutil.rmtree(self.oldData._repo.basecachedir, ignore_errors=True)
750 +        shutil.rmtree(self.oldData._repo.base_persistdir, ignore_errors=True)
751 +        
752 +    def _write_out_read_pkgs_list(self):
753          # write out the read_pkgs_list file with self.read_pkgs
754          if self.conf.read_pkgs_list:
755              try:
756 @@ -1167,6 +1227,23 @@ class MetaDataGenerator:
757                                % self.conf.read_pkgs_list)
758                  self.errorlog(_('Error was %s') % e)
759  
760 +    def _cleanup_tmp_repodata_dir(self):
761 +        output_old_dir = os.path.join(self.conf.outputdir, self.conf.olddir)
762 +        output_temp_dir = os.path.join(self.conf.outputdir, self.conf.tempdir)
763 +        for dirbase in (self.conf.olddir, self.conf.tempdir):
764 +            dirpath = os.path.join(self.conf.outputdir, dirbase)
765 +            if os.path.exists(dirpath):
766 +                try:
767 +                    os.rmdir(dirpath)
768 +                except OSError, e:
769 +                    self.errorlog(_('Could not remove  temp metadata dir: %s')
770 +                                  % dirbase)
771 +                    self.errorlog(_('Error was %s') % e)
772 +                    self.errorlog(_('Please clean up this directory manually.'))
773 +        # our worker tmp path
774 +        if hasattr(self, '_worker_tmp_path') and os.path.exists(self._worker_tmp_path):
775 +            shutil.rmtree(self._worker_tmp_path, ignore_errors=True)
776 +        
777      def setup_sqlite_dbs(self, initdb=True):
778          """sets up the sqlite dbs w/table schemas and db_infos"""
779          destdir = os.path.join(self.conf.outputdir, self.conf.tempdir)
780 @@ -1194,24 +1271,6 @@ class SplitMetaDataGenerator(MetaDataGenerator):
781          (scheme, netloc, path, query, fragid) = urlparse.urlsplit(url)
782          return urlparse.urlunsplit((scheme, netloc, path, query, str(fragment)))
783  
784 -    def getFileList(self, directory, ext):
785 -
786 -        extlen = len(ext)
787 -
788 -        def extension_visitor(arg, dirname, names):
789 -            for fn in names:
790 -                if os.path.isdir(fn):
791 -                    continue
792 -                elif fn[-extlen:].lower() == '%s' % (ext):
793 -                    reldir = os.path.basename(dirname)
794 -                    if reldir == os.path.basename(directory):
795 -                        reldir = ""
796 -                    arg.append(os.path.join(reldir, fn))
797 -
798 -        rpmlist = []
799 -        os.path.walk(directory, extension_visitor, rpmlist)
800 -        return rpmlist
801 -
802      def doPkgMetadata(self):
803          """all the heavy lifting for the package metadata"""
804          if len(self.conf.directories) == 1:
805 @@ -1232,6 +1291,19 @@ class SplitMetaDataGenerator(MetaDataGenerator):
806                      thisdir = os.path.join(self.conf.basedir, mydir)
807  
808              filematrix[mydir] = self.getFileList(thisdir, '.rpm')
809 +
810 +            #  pkglist is a bit different for split media, as we have to know
811 +            # which dir. it belongs to. So we walk the dir. and then filter.
812 +            # We could be faster by not walking the dir. ... but meh.
813 +            if self.conf.pkglist:
814 +                pkglist = set(self.conf.pkglist)
815 +                pkgs = []
816 +                for fname in filematrix[mydir]:
817 +                    if fname not in pkglist:
818 +                        continue
819 +                    pkgs.append(fname)
820 +                filematrix[mydir] = pkgs
821 +
822              self.trimRpms(filematrix[mydir])
823              self.pkgcount += len(filematrix[mydir])
824  
825 @@ -1240,7 +1312,6 @@ class SplitMetaDataGenerator(MetaDataGenerator):
826          self.conf.baseurl = self._getFragmentUrl(self.conf.baseurl, mediano)
827          try:
828              self.openMetadataDocs()
829 -            original_basedir = self.conf.basedir
830              for mydir in self.conf.directories:
831                  self.conf.baseurl = self._getFragmentUrl(self.conf.baseurl, mediano)
832                  self.writeMetadataDocs(filematrix[mydir], mydir)
833 diff --git a/createrepo/merge.py b/createrepo/merge.py
834 index b3b2ea1..1ac43bb 100644
835 --- a/createrepo/merge.py
836 +++ b/createrepo/merge.py
837 @@ -24,6 +24,7 @@ from yum.misc import unique, getCacheDir
838  import yum.update_md
839  import rpmUtils.arch
840  import operator
841 +from utils import MDError
842  import createrepo
843  import tempfile
844  
845 @@ -84,6 +85,8 @@ class RepoMergeBase:
846          # in the repolist
847          count = 0
848          for r in self.repolist:
849 +            if r[0] == '/':
850 +                r = 'file://' + r # just fix the file repos, this is silly.
851              count +=1
852              rid = 'repo%s' % count
853              n = self.yumbase.add_enable_repo(rid, baseurls=[r],
854 @@ -92,7 +95,10 @@ class RepoMergeBase:
855              n._merge_rank = count
856  
857          #setup our sacks
858 -        self.yumbase._getSacks(archlist=self.archlist)
859 +        try:
860 +            self.yumbase._getSacks(archlist=self.archlist)
861 +        except yum.Errors.RepoError, e:
862 +            raise MDError, "Could not setup merge repo pkgsack: %s" % e
863  
864          myrepos = self.yumbase.repos.listEnabled()
865  
866 @@ -102,11 +108,16 @@ class RepoMergeBase:
867      def write_metadata(self, outputdir=None):
868          mytempdir = tempfile.mkdtemp()
869          if self.groups:
870 -            comps_fn = mytempdir + '/groups.xml'
871 -            compsfile = open(comps_fn, 'w')
872 -            compsfile.write(self.yumbase.comps.xml())
873 -            compsfile.close()
874 -            self.mdconf.groupfile=comps_fn
875 +            try:
876 +                comps_fn = mytempdir + '/groups.xml'
877 +                compsfile = open(comps_fn, 'w')
878 +                compsfile.write(self.yumbase.comps.xml())
879 +                compsfile.close()
880 +            except yum.Errors.GroupsError, e:
881 +                # groups not being available shouldn't be a fatal error
882 +                pass
883 +            else:
884 +                self.mdconf.groupfile=comps_fn
885  
886          if self.updateinfo:
887              ui_fn = mytempdir + '/updateinfo.xml'
888 diff --git a/createrepo/readMetadata.py b/createrepo/readMetadata.py
889 index 27d3690..54863cb 100644
890 --- a/createrepo/readMetadata.py
891 +++ b/createrepo/readMetadata.py
892 @@ -16,11 +16,25 @@
893  # Copyright 2006 Red Hat
894  
895  import os
896 -import libxml2
897  import stat
898  from utils import errorprint, _
899  
900 -from yum import repoMDObject
901 +import yum
902 +from yum import misc
903 +from yum.Errors import YumBaseError
904 +import tempfile
905 +class CreaterepoPkgOld(yum.sqlitesack.YumAvailablePackageSqlite):
906 +    # special for special people like us.
907 +    def _return_remote_location(self):
908 +
909 +        if self.basepath:
910 +            msg = """<location xml:base="%s" href="%s"/>\n""" % (
911 +                                     misc.to_xml(self.basepath, attrib=True),
912 +                                     misc.to_xml(self.relativepath, attrib=True))
913 +        else:
914 +            msg = """<location href="%s"/>\n""" % misc.to_xml(self.relativepath, attrib=True)
915 +
916 +        return msg  
917  
918  
919  class MetadataIndex(object):
920 @@ -30,178 +44,72 @@ class MetadataIndex(object):
921              opts = {}
922          self.opts = opts
923          self.outputdir = outputdir
924 +        realpath = os.path.realpath(outputdir)
925          repodatadir = self.outputdir + '/repodata'
926 -        myrepomdxml = repodatadir + '/repomd.xml'
927 -        if os.path.exists(myrepomdxml):
928 -            repomd = repoMDObject.RepoMD('garbageid', myrepomdxml)
929 -            b = repomd.getData('primary').location[1]
930 -            f = repomd.getData('filelists').location[1]
931 -            o = repomd.getData('other').location[1]
932 -            basefile = os.path.join(self.outputdir, b)
933 -            filelistfile = os.path.join(self.outputdir, f)
934 -            otherfile = os.path.join(self.outputdir, o)
935 -        else:
936 -            basefile = filelistfile = otherfile = ""
937 -
938 -        self.files = {'base' : basefile,
939 -                      'filelist' : filelistfile,
940 -                      'other' : otherfile}
941 -        self.scan()
942 +        self._repo = yum.yumRepo.YumRepository('garbageid')
943 +        self._repo.baseurl = 'file://' + realpath
944 +        self._repo.basecachedir = tempfile.mkdtemp(dir='/var/tmp', prefix="createrepo")
945 +        self._repo.base_persistdir = tempfile.mkdtemp(dir='/var/tmp', prefix="createrepo-p")
946 +        self._repo.metadata_expire = 1
947 +        self._repo.gpgcheck = 0
948 +        self._repo.repo_gpgcheck = 0
949 +        self._repo._sack = yum.sqlitesack.YumSqlitePackageSack(CreaterepoPkgOld)
950 +        self.pkg_tups_by_path = {}
951 +        try:
952 +            self.scan()
953 +        except YumBaseError, e:
954 +            print "Could not find valid repo at: %s" % self.outputdir
955 +        
956  
957      def scan(self):
958 -        """Read in and index old repo data"""
959 -        self.basenodes = {}
960 -        self.filesnodes = {}
961 -        self.othernodes = {}
962 -        self.pkg_ids = {}
963 +        """Read in old repodata"""
964          if self.opts.get('verbose'):
965              print _("Scanning old repo data")
966 -        for fn in self.files.values():
967 -            if not os.path.exists(fn):
968 -                #cannot scan
969 -                errorprint(_("Warning: Old repodata file missing: %s") % fn)
970 -                return
971 -        root = libxml2.parseFile(self.files['base']).getRootElement()
972 -        self._scanPackageNodes(root, self._handleBase)
973 -        if self.opts.get('verbose'):
974 -            print _("Indexed %i base nodes" % len(self.basenodes))
975 -        root = libxml2.parseFile(self.files['filelist']).getRootElement()
976 -        self._scanPackageNodes(root, self._handleFiles)
977 -        if self.opts.get('verbose'):
978 -            print _("Indexed %i filelist nodes" % len(self.filesnodes))
979 -        root = libxml2.parseFile(self.files['other']).getRootElement()
980 -        self._scanPackageNodes(root, self._handleOther)
981 -        if self.opts.get('verbose'):
982 -            print _("Indexed %i other nodes" % len(self.othernodes))
983 -        #reverse index pkg ids to track references
984 -        self.pkgrefs = {}
985 -        for relpath, pkgid in self.pkg_ids.iteritems():
986 -            self.pkgrefs.setdefault(pkgid,[]).append(relpath)
987 -
988 -    def _scanPackageNodes(self, root, handler):
989 -        node = root.children
990 -        while node is not None:
991 -            if node.type != "element":
992 -                node = node.next
993 +        self._repo.sack.populate(self._repo, 'all', None, False)
994 +        for thispo in self._repo.sack:
995 +            mtime = thispo.filetime
996 +            size = thispo.size
997 +            relpath = thispo.relativepath
998 +            do_stat = self.opts.get('do_stat', True)
999 +            if mtime is None:
1000 +                print _("mtime missing for %s") % relpath
1001                  continue
1002 -            if node.name == "package":
1003 -                handler(node)
1004 -            node = node.next
1005 -
1006 -    def _handleBase(self, node):
1007 -        top = node
1008 -        node = node.children
1009 -        pkgid = None
1010 -        mtime = None
1011 -        size = None
1012 -        relpath = None
1013 -        do_stat = self.opts.get('do_stat', True)
1014 -        while node is not None:
1015 -            if node.type != "element":
1016 -                node = node.next
1017 +            if size is None:
1018 +                print _("size missing for %s") % relpath
1019                  continue
1020 -            if node.name == "checksum":
1021 -                pkgid = node.content
1022 -            elif node.name == "time":
1023 -                mtime = int(node.prop('file'))
1024 -            elif node.name == "size":
1025 -                size = int(node.prop('package'))
1026 -            elif node.name == "location":
1027 -                relpath = node.prop('href')
1028 -            node = node.next
1029 -        if relpath is None:
1030 -            print _("Incomplete data for node")
1031 -            return
1032 -        if pkgid is None:
1033 -            print _("pkgid missing for %s") % relpath
1034 -            return
1035 -        if mtime is None:
1036 -            print _("mtime missing for %s") % relpath
1037 -            return
1038 -        if size is None:
1039 -            print _("size missing for %s") % relpath
1040 -            return
1041 -        if do_stat:
1042 -            filepath = os.path.join(self.opts['pkgdir'], relpath)
1043 -            try:
1044 -                st = os.stat(filepath)
1045 -            except OSError:
1046 -                #file missing -- ignore
1047 -                return
1048 -            if not stat.S_ISREG(st.st_mode):
1049 -                #ignore non files
1050 -                return
1051 -            #check size and mtime
1052 -            if st.st_size != size:
1053 -                if self.opts.get('verbose'):
1054 -                    print _("Size (%i -> %i) changed for file %s") % (size,st.st_size,filepath)
1055 -                return
1056 -            if int(st.st_mtime) != mtime:
1057 -                if self.opts.get('verbose'):
1058 -                    print _("Modification time changed for %s") % filepath
1059 -                return
1060 -        #otherwise we index
1061 -        self.basenodes[relpath] = top
1062 -        self.pkg_ids[relpath] = pkgid
1063 -
1064 -    def _handleFiles(self, node):
1065 -        pkgid = node.prop('pkgid')
1066 -        if pkgid:
1067 -            self.filesnodes[pkgid] = node
1068 -
1069 -    def _handleOther(self, node):
1070 -        pkgid = node.prop('pkgid')
1071 -        if pkgid:
1072 -            self.othernodes[pkgid] = node
1073 +            if do_stat:
1074 +                filepath = os.path.join(self.opts['pkgdir'], relpath)
1075 +                try:
1076 +                    st = os.stat(filepath)
1077 +                except OSError:
1078 +                    #file missing -- ignore
1079 +                    continue
1080 +                if not stat.S_ISREG(st.st_mode):
1081 +                    #ignore non files
1082 +                    continue
1083 +                #check size and mtime
1084 +                if st.st_size != size:
1085 +                    if self.opts.get('verbose'):
1086 +                        print _("Size (%i -> %i) changed for file %s") % (size,st.st_size,filepath)
1087 +                    continue
1088 +                if int(st.st_mtime) != mtime:
1089 +                    if self.opts.get('verbose'):
1090 +                        print _("Modification time changed for %s") % filepath
1091 +                    continue
1092 +
1093 +            self.pkg_tups_by_path[relpath] = thispo.pkgtup
1094 +
1095  
1096 -    def getNodes(self, relpath):
1097 -        """Return base, filelist, and other nodes for file, if they exist
1098  
1099 -        Returns a tuple of nodes, or None if not found
1100 +    def getNodes(self, relpath):
1101 +        """return a package object based on relative path of pkg
1102          """
1103 -        bnode = self.basenodes.get(relpath,None)
1104 -        if bnode is None:
1105 -            return None
1106 -        pkgid = self.pkg_ids.get(relpath,None)
1107 -        if pkgid is None:
1108 -            print _("No pkgid found for: %s") % relpath
1109 -            return None
1110 -        fnode = self.filesnodes.get(pkgid,None)
1111 -        if fnode is None:
1112 -            return None
1113 -        onode = self.othernodes.get(pkgid,None)
1114 -        if onode is None:
1115 -            return None
1116 -        return bnode, fnode, onode
1117 -
1118 -    def freeNodes(self,relpath):
1119 -        #causing problems
1120 -        """Free up nodes corresponding to file, if possible"""
1121 -        bnode = self.basenodes.get(relpath,None)
1122 -        if bnode is None:
1123 -            print "Missing node for %s" % relpath
1124 -            return
1125 -        bnode.unlinkNode()
1126 -        bnode.freeNode()
1127 -        del self.basenodes[relpath]
1128 -        pkgid = self.pkg_ids.get(relpath,None)
1129 -        if pkgid is None:
1130 -            print _("No pkgid found for: %s") % relpath
1131 -            return None
1132 -        del self.pkg_ids[relpath]
1133 -        dups = self.pkgrefs.get(pkgid)
1134 -        dups.remove(relpath)
1135 -        if len(dups):
1136 -            #still referenced
1137 -            return
1138 -        del self.pkgrefs[pkgid]
1139 -        for nodes in self.filesnodes, self.othernodes:
1140 -            node = nodes.get(pkgid)
1141 -            if node is not None:
1142 -                node.unlinkNode()
1143 -                node.freeNode()
1144 -                del nodes[pkgid]
1145 +        if relpath in self.pkg_tups_by_path:
1146 +            pkgtup = self.pkg_tups_by_path[relpath]
1147 +            return self._repo.sack.searchPkgTuple(pkgtup)[0]
1148 +        return None
1149  
1150 +    
1151  
1152  if __name__ == "__main__":
1153      cwd = os.getcwd()
1154 @@ -209,9 +117,9 @@ if __name__ == "__main__":
1155              'pkgdir': cwd}
1156  
1157      idx = MetadataIndex(cwd, opts)
1158 -    for fn in idx.basenodes.keys():
1159 -        a,b,c, = idx.getNodes(fn)
1160 -        a.serialize()
1161 -        b.serialize()
1162 -        c.serialize()
1163 -        idx.freeNodes(fn)
1164 +    for fn in idx.pkg_tups_by_path:
1165 +        po = idx.getNodes(fn)
1166 +        print po.xml_dump_primary_metadata()
1167 +        print po.xml_dump_filelists_metadata()
1168 +        print po.xml_dump_other_metadata()
1169 +
1170 diff --git a/createrepo/utils.py b/createrepo/utils.py
1171 index 995c3b9..b0d92ec 100644
1172 --- a/createrepo/utils.py
1173 +++ b/createrepo/utils.py
1174 @@ -23,6 +23,12 @@ import bz2
1175  import gzip
1176  from gzip import write32u, FNAME
1177  from yum import misc
1178 +_available_compression = ['gz', 'bz2']
1179 +try:
1180 +    import lzma
1181 +    _available_compression.append('xz')
1182 +except ImportError:
1183 +    lzma = None
1184  
1185  def errorprint(stuff):
1186      print >> sys.stderr, stuff
1187 @@ -34,22 +40,14 @@ def _(args):
1188  
1189  class GzipFile(gzip.GzipFile):
1190      def _write_gzip_header(self):
1191 +        # Generate a header that is easily reproduced with gzip -9 -n on
1192 +        # an unix-like system
1193          self.fileobj.write('\037\213')             # magic header
1194          self.fileobj.write('\010')                 # compression method
1195 -        if hasattr(self, 'name'):
1196 -            fname = self.name[:-3]
1197 -        else:
1198 -            fname = self.filename[:-3]
1199 -        flags = 0
1200 -        if fname:
1201 -            flags = FNAME
1202 -        self.fileobj.write(chr(flags))
1203 -        write32u(self.fileobj, long(0))
1204 -        self.fileobj.write('\002')
1205 -        self.fileobj.write('\377')
1206 -        if fname:
1207 -            self.fileobj.write(fname + '\000')
1208 -
1209 +        self.fileobj.write('\000')                 # flags
1210 +        write32u(self.fileobj, long(0))            # timestamp
1211 +        self.fileobj.write('\002')                 # max compression
1212 +        self.fileobj.write('\003')                 # UNIX
1213  
1214  def _gzipOpen(filename, mode="rb", compresslevel=9):
1215      return GzipFile(filename, mode, compresslevel)
1216 @@ -69,6 +67,75 @@ def bzipFile(source, dest):
1217      s_fn.close()
1218  
1219  
1220 +def xzFile(source, dest):
1221 +    if not 'xz' in _available_compression:
1222 +        raise MDError, "Cannot use xz for compression, library/module is not available"
1223 +        
1224 +    s_fn = open(source, 'rb')
1225 +    destination = lzma.LZMAFile(dest, 'w')
1226 +
1227 +    while True:
1228 +        data = s_fn.read(1024000)
1229 +
1230 +        if not data: break
1231 +        destination.write(data)
1232 +
1233 +    destination.close()
1234 +    s_fn.close()
1235 +
1236 +def gzFile(source, dest):
1237 +        
1238 +    s_fn = open(source, 'rb')
1239 +    destination = GzipFile(dest, 'w')
1240 +
1241 +    while True:
1242 +        data = s_fn.read(1024000)
1243 +
1244 +        if not data: break
1245 +        destination.write(data)
1246 +
1247 +    destination.close()
1248 +    s_fn.close()
1249 +
1250 +
1251 +class Duck:
1252 +    def __init__(self, **attr):
1253 +        self.__dict__ = attr
1254 +
1255 +
1256 +def compressFile(source, dest, compress_type):
1257 +    """Compress an existing file using any compression type from source to dest"""
1258 +    
1259 +    if compress_type == 'xz':
1260 +        xzFile(source, dest)
1261 +    elif compress_type == 'bz2':
1262 +        bzipFile(source, dest)
1263 +    elif compress_type == 'gz':
1264 +        gzFile(source, dest)
1265 +    else:
1266 +        raise MDError, "Unknown compression type %s" % compress_type
1267 +    
1268 +def compressOpen(fn, mode='rb', compress_type=None):
1269 +    
1270 +    if not compress_type:
1271 +        # we are readonly and we don't give a compress_type - then guess based on the file extension
1272 +        compress_type = fn.split('.')[-1]
1273 +        if compress_type not in _available_compression:
1274 +            compress_type = 'gz'
1275 +            
1276 +    if compress_type == 'xz':
1277 +        fh = lzma.LZMAFile(fn, mode)
1278 +        if mode == 'w':
1279 +            fh = Duck(write=lambda s, write=fh.write: s != '' and write(s),
1280 +                      close=fh.close)
1281 +        return fh
1282 +    elif compress_type == 'bz2':
1283 +        return bz2.BZ2File(fn, mode)
1284 +    elif compress_type == 'gz':
1285 +        return _gzipOpen(fn, mode)
1286 +    else:
1287 +        raise MDError, "Unknown compression type %s" % compress_type
1288 +    
1289  def returnFD(filename):
1290      try:
1291          fdno = os.open(filename, os.O_RDONLY)
1292 @@ -124,15 +191,28 @@ def encodefiletypelist(filetypelist):
1293      return result
1294  
1295  def split_list_into_equal_chunks(seq, num_chunks):
1296 -    avg = len(seq) / float(num_chunks)
1297 -    out = []
1298 -    last = 0.0
1299 -    while last < len(seq):
1300 -        out.append(seq[int(last):int(last + avg)])
1301 -        last += avg
1302 -
1303 +    """it's used on sorted input which is then merged in order"""
1304 +    out = [[] for i in range(num_chunks)]
1305 +    for i, item in enumerate(seq):
1306 +        out[i % num_chunks].append(item)
1307      return out
1308  
1309 +def num_cpus_online(unknown=1):
1310 +    if not hasattr(os, "sysconf"):
1311 +        return unknown
1312 +
1313 +    if not os.sysconf_names.has_key("SC_NPROCESSORS_ONLN"):
1314 +        return unknown
1315 +
1316 +    ncpus = os.sysconf("SC_NPROCESSORS_ONLN")
1317 +    try:
1318 +        if int(ncpus) > 0:
1319 +            return ncpus
1320 +    except:
1321 +        pass
1322 +
1323 +    return unknown
1324 +
1325  
1326  class MDError(Exception):
1327      def __init__(self, value=None):
1328 diff --git a/createrepo/yumbased.py b/createrepo/yumbased.py
1329 index ac06196..f87ac6d 100644
1330 --- a/createrepo/yumbased.py
1331 +++ b/createrepo/yumbased.py
1332 @@ -16,6 +16,11 @@
1333  
1334  
1335  import os
1336 +def _get_umask():
1337 +   oumask = os.umask(0)
1338 +   os.umask(oumask)
1339 +   return oumask
1340 +_b4rpm_oumask = _get_umask()
1341  import rpm
1342  import types
1343  
1344 @@ -86,6 +91,9 @@ class CreateRepoPackage(YumLocalPackage):
1345                  csumo = os.fdopen(csumo, 'w', -1)
1346                  csumo.write(checksum)
1347                  csumo.close()
1348 +                #  tempfile forces 002 ... we want to undo that, so that users
1349 +                # can share the cache. BZ 833350.
1350 +                os.chmod(tmpfilename, 0666 ^ _b4rpm_oumask)
1351                  os.rename(tmpfilename, csumfile)
1352              except:
1353                  pass
1354 diff --git a/docs/createrepo.8 b/docs/createrepo.8
1355 index e3c4c3b..ff359de 100644
1356 --- a/docs/createrepo.8
1357 +++ b/docs/createrepo.8
1358 @@ -53,7 +53,8 @@ gullible).
1359  Don't generate repo metadata, if their timestamps are newer than its rpms.
1360  This option decreases the processing time drastically again, if you happen
1361  to run it on an unmodified repo, but it is (currently) mutual exclusive
1362 -with the --split option.
1363 +with the --split option. NOTE: This command will not notice when 
1364 +packages have been removed from repo. Use --update to handle that.
1365  .br
1366  .IP "\fB\--split\fP"
1367  Run in split media mode. Rather than pass a single directory, take a set of
1368 @@ -104,7 +105,16 @@ Tells createrepo to generate deltarpms and the delta metadata
1369  paths to look for older pkgs to delta against. Can be specified multiple times
1370  .IP "\fB\--num-deltas\fP int"
1371  the number of older versions to make deltas against. Defaults to 1
1372 -
1373 +.IP "\fB\--read-pkgs-list\fP READ_PKGS_LIST
1374 +output the paths to the pkgs actually read useful with  --update
1375 +.IP "\fB\--max-delta-rpm-size\fP MAX_DELTA_RPM_SIZE
1376 +max size of an rpm that to run deltarpm against (in bytes)
1377 +.IP "\fB\--workers\fP WORKERS
1378 +number of workers to spawn to read rpms
1379 +.IP "\fB\--compress-type\fP
1380 +specify which compression method to use: compat (default),
1381 +xz (may not be available), gz, bz2.
1382 +.IP
1383  
1384  .SH "EXAMPLES"
1385  Here is an example of a repository with a groups file. Note that the
1386 diff --git a/genpkgmetadata.py b/genpkgmetadata.py
1387 index 8c98191..c46e441 100755
1388 --- a/genpkgmetadata.py
1389 +++ b/genpkgmetadata.py
1390 @@ -37,6 +37,12 @@ def parse_args(args, conf):
1391         Sanity check all the things being passed in.
1392      """
1393  
1394 +    def_workers = os.nice(0)
1395 +    if def_workers > 0:
1396 +        def_workers = 1 # We are niced, so just use a single worker.
1397 +    else:
1398 +        def_workers = 0 # zoooom....
1399 +
1400      _def   = yum.misc._default_checksums[0]
1401      _avail = yum.misc._available_checksums
1402      parser = OptionParser(version = "createrepo %s" % createrepo.__version__)
1403 @@ -100,6 +106,8 @@ def parse_args(args, conf):
1404      parser.add_option("--simple-md-filenames", dest="simple_md_filenames",
1405          help="do not include the file's checksum in the filename, helps with proxies",
1406          default=False, action="store_true")
1407 +    parser.add_option("--retain-old-md", default=0, type='int', dest='retain_old_md',
1408 +        help="keep around the latest (by timestamp) N copies of the old repodata")
1409      parser.add_option("--distro", default=[], action="append",
1410          help="distro tag and optional cpeid: --distro" "'cpeid,textname'")
1411      parser.add_option("--content", default=[], dest='content_tags',
1412 @@ -119,10 +127,15 @@ def parse_args(args, conf):
1413      parser.add_option("--max-delta-rpm-size", default=100000000,
1414          dest='max_delta_rpm_size', type='int',
1415          help="max size of an rpm that to run deltarpm against (in bytes)")
1416 -
1417 -    parser.add_option("--workers", default=1,
1418 +    parser.add_option("--workers", default=def_workers,
1419          dest='workers', type='int',
1420          help="number of workers to spawn to read rpms")
1421 +    parser.add_option("--xz", default=False,
1422 +        action="store_true",
1423 +        help="use xz for repodata compression")
1424 +    parser.add_option("--compress-type", default='compat', dest="compress_type",
1425 +        help="which compression type to use")
1426 +        
1427      
1428      (opts, argsleft) = parser.parse_args(args)
1429      if len(argsleft) > 1 and not opts.split:
1430 @@ -138,6 +151,9 @@ def parse_args(args, conf):
1431      else:
1432          directories = argsleft
1433  
1434 +    if opts.workers >= 128:
1435 +        errorprint(_('Warning: More than 128 workers is a lot. Limiting.'))
1436 +        opts.workers = 128
1437      if opts.sumtype == 'sha1':
1438          errorprint(_('Warning: It is more compatible to use sha instead of sha1'))
1439  
1440 @@ -155,6 +171,11 @@ def parse_args(args, conf):
1441      
1442      if opts.nodatabase:
1443          opts.database = False
1444 +    
1445 +    # xz is just a shorthand for compress_type
1446 +    if opts.xz and opts.compress_type == 'compat':
1447 +        opts.compress_type='xz'
1448 +        
1449          
1450      # let's switch over to using the conf object - put all the opts into it
1451      for opt in parser.option_list:
1452 @@ -240,6 +261,7 @@ def main(args):
1453              if mdgen.checkTimeStamps():
1454                  if mdgen.conf.verbose:
1455                      print _('repo is up to date')
1456 +                mdgen._cleanup_tmp_repodata_dir()
1457                  sys.exit(0)
1458  
1459          if conf.profile:
1460 diff --git a/mergerepo.py b/mergerepo.py
1461 index 05e5f5e..80cb1a8 100755
1462 --- a/mergerepo.py
1463 +++ b/mergerepo.py
1464 @@ -18,6 +18,7 @@
1465  
1466  import sys
1467  import createrepo.merge
1468 +from createrepo.utils import MDError
1469  from optparse import OptionParser
1470  
1471  #TODO:
1472 @@ -47,6 +48,9 @@ def parse_args(args):
1473                        help="Do not merge group(comps) metadata")
1474      parser.add_option("", "--noupdateinfo", default=False, action="store_true",
1475                        help="Do not merge updateinfo metadata")
1476 +    parser.add_option("--compress-type", default=None, dest="compress_type",
1477 +                      help="which compression type to use")
1478 +                      
1479      (opts, argsleft) = parser.parse_args(args)
1480  
1481      if len(opts.repos) < 2:
1482 @@ -77,9 +81,14 @@ def main(args):
1483          rmbase.groups = False
1484      if opts.noupdateinfo:
1485          rmbase.updateinfo = False
1486 -
1487 -    rmbase.merge_repos()
1488 -    rmbase.write_metadata()
1489 -
1490 +    if opts.compress_type:
1491 +        rmbase.mdconf.compress_type = opts.compress_type
1492 +    try:
1493 +        rmbase.merge_repos()
1494 +        rmbase.write_metadata()
1495 +    except MDError, e:
1496 +        print >> sys.stderr, "Could not merge repos: %s" % e
1497 +        sys.exit(1)
1498 +        
1499  if __name__ == "__main__":
1500      main(sys.argv[1:])
1501 diff --git a/modifyrepo.py b/modifyrepo.py
1502 index 17094a4..bf1eec0 100755
1503 --- a/modifyrepo.py
1504 +++ b/modifyrepo.py
1505 @@ -1,11 +1,15 @@
1506  #!/usr/bin/python
1507 -# This tools is used to insert arbitrary metadata into an RPM repository.
1508 +# This tool is used to manipulate arbitrary metadata in a RPM repository.
1509  # Example:
1510  #           ./modifyrepo.py updateinfo.xml myrepo/repodata
1511 +#           or
1512 +#           ./modifyrepo.py --remove updateinfo.xml myrepo/repodata
1513  # or in Python:
1514  #           >>> from modifyrepo import RepoMetadata
1515  #           >>> repomd = RepoMetadata('myrepo/repodata')
1516  #           >>> repomd.add('updateinfo.xml')
1517 +#           or
1518 +#           >>> repomd.remove('updateinfo.xml')
1519  #
1520  # This program is free software; you can redistribute it and/or modify
1521  # it under the terms of the GNU General Public License as published by
1522 @@ -20,11 +24,13 @@
1523  # (C) Copyright 2006  Red Hat, Inc.
1524  # Luke Macken <lmacken@redhat.com>
1525  # modified by Seth Vidal 2008
1526 +# modified by Daniel Mach 2011
1527  
1528  import os
1529  import sys
1530  from createrepo import __version__
1531 -from createrepo.utils import checksum_and_rename, GzipFile, MDError
1532 +from createrepo.utils import checksum_and_rename, compressOpen, MDError
1533 +from createrepo.utils import _available_compression
1534  from yum.misc import checksum
1535  
1536  from yum.repoMDObject import RepoMD, RepoMDError, RepoData
1537 @@ -39,6 +45,8 @@ class RepoMetadata:
1538          self.repodir = os.path.abspath(repo)
1539          self.repomdxml = os.path.join(self.repodir, 'repomd.xml')
1540          self.checksum_type = 'sha256'
1541 +        self.compress = False
1542 +        self.compress_type = _available_compression[-1] # best available
1543  
1544          if not os.path.exists(self.repomdxml):
1545              raise MDError, '%s not found' % self.repomdxml
1546 @@ -49,6 +57,35 @@ class RepoMetadata:
1547          except RepoMDError, e:
1548              raise MDError, 'Could not parse %s' % self.repomdxml
1549  
1550 +    def _get_mdtype(self, mdname, mdtype=None):
1551 +        """ Get mdtype from existing mdtype or from a mdname. """
1552 +        if mdtype:
1553 +            return mdtype
1554 +        return mdname.split('.')[0]
1555 +
1556 +    def _print_repodata(self, repodata):
1557 +        """ Print repodata details. """
1558 +        print "           type =", repodata.type
1559 +        print "       location =", repodata.location[1]
1560 +        print "       checksum =", repodata.checksum[1]
1561 +        print "      timestamp =", repodata.timestamp
1562 +        print "  open-checksum =", repodata.openchecksum[1]
1563 +
1564 +    def _write_repomd(self):
1565 +        """ Write the updated repomd.xml. """
1566 +        outmd = file(self.repomdxml, 'w')
1567 +        outmd.write(self.repoobj.dump_xml())
1568 +        outmd.close()
1569 +        print "Wrote:", self.repomdxml
1570 +
1571 +    def _remove_repodata_file(self, repodata):
1572 +        """ Remove a file specified in repodata location """
1573 +        try:
1574 +            os.remove(repodata.location[1])
1575 +        except OSError, ex:
1576 +            if ex.errno != 2:
1577 +                # continue on a missing file
1578 +                raise MDError("could not remove file %s" % repodata.location[1])
1579  
1580      def add(self, metadata, mdtype=None):
1581          """ Insert arbitrary metadata into this repository.
1582 @@ -63,8 +100,8 @@ class RepoMetadata:
1583              mdname = 'updateinfo.xml'
1584          elif isinstance(metadata, str):
1585              if os.path.exists(metadata):
1586 -                if metadata.endswith('.gz'):
1587 -                    oldmd = GzipFile(filename=metadata, mode='rb')
1588 +                if metadata.split('.')[-1] in ('gz', 'bz2', 'xz'):
1589 +                    oldmd = compressOpen(metadata, mode='rb')
1590                  else:
1591                      oldmd = file(metadata, 'r')
1592                  md = oldmd.read()
1593 @@ -75,14 +112,19 @@ class RepoMetadata:
1594          else:
1595              raise MDError, 'invalid metadata type'
1596  
1597 +        do_compress = False
1598          ## Compress the metadata and move it into the repodata
1599 -        if not mdname.endswith('.gz'):
1600 -            mdname += '.gz'
1601 -        if not mdtype:
1602 -            mdtype = mdname.split('.')[0]
1603 -            
1604 +        if self.compress or not mdname.split('.')[-1] in ('gz', 'bz2', 'xz'):
1605 +            do_compress = True
1606 +            mdname += '.' + self.compress_type
1607 +        mdtype = self._get_mdtype(mdname, mdtype)
1608 +
1609          destmd = os.path.join(self.repodir, mdname)
1610 -        newmd = GzipFile(filename=destmd, mode='wb')
1611 +        if do_compress:
1612 +            newmd = compressOpen(destmd, mode='wb', compress_type=self.compress_type)
1613 +        else:
1614 +            newmd = open(destmd, 'wb')
1615 +            
1616          newmd.write(md)
1617          newmd.close()
1618          print "Wrote:", destmd
1619 @@ -91,11 +133,8 @@ class RepoMetadata:
1620          csum, destmd = checksum_and_rename(destmd, self.checksum_type)
1621          base_destmd = os.path.basename(destmd)
1622  
1623 -
1624 -        ## Remove any stale metadata
1625 -        if mdtype in self.repoobj.repoData:
1626 -            del self.repoobj.repoData[mdtype]
1627 -            
1628 +        # Remove any stale metadata
1629 +        old_rd = self.repoobj.repoData.pop(mdtype, None)
1630  
1631          new_rd = RepoData()
1632          new_rd.type = mdtype
1633 @@ -105,18 +144,28 @@ class RepoMetadata:
1634          new_rd.size = str(os.stat(destmd).st_size)
1635          new_rd.timestamp = str(os.stat(destmd).st_mtime)
1636          self.repoobj.repoData[new_rd.type] = new_rd
1637 -        
1638 -        print "           type =", new_rd.type
1639 -        print "       location =", new_rd.location[1]
1640 -        print "       checksum =", new_rd.checksum[1]
1641 -        print "      timestamp =", new_rd.timestamp
1642 -        print "  open-checksum =", new_rd.openchecksum[1]
1643 -
1644 -        ## Write the updated repomd.xml
1645 -        outmd = file(self.repomdxml, 'w')
1646 -        outmd.write(self.repoobj.dump_xml())
1647 -        outmd.close()
1648 -        print "Wrote:", self.repomdxml
1649 +        self._print_repodata(new_rd)
1650 +        self._write_repomd()
1651 +
1652 +        if old_rd is not None and old_rd.location[1] != new_rd.location[1]:
1653 +            # remove the old file when overwriting metadata
1654 +            # with the same mdtype but different location
1655 +            self._remove_repodata_file(old_rd)
1656 +
1657 +    def remove(self, metadata, mdtype=None):
1658 +        """ Remove metadata from this repository. """
1659 +        mdname = metadata
1660 +        mdtype = self._get_mdtype(mdname, mdtype)
1661 +
1662 +        old_rd = self.repoobj.repoData.pop(mdtype, None)
1663 +        if old_rd is None:
1664 +            print "Metadata not found: %s" % mdtype
1665 +            return
1666 +
1667 +        self._remove_repodata_file(old_rd)
1668 +        print "Removed:"
1669 +        self._print_repodata(old_rd)
1670 +        self._write_repomd()
1671  
1672  
1673  def main(args):
1674 @@ -124,7 +173,13 @@ def main(args):
1675      # query options
1676      parser.add_option("--mdtype", dest='mdtype',
1677                        help="specific datatype of the metadata, will be derived from the filename if not specified")
1678 -    parser.usage = "modifyrepo [options] <input_metadata> <output repodata>"
1679 +    parser.add_option("--remove", action="store_true",
1680 +                      help="remove specified file from repodata")
1681 +    parser.add_option("--compress", action="store_true", default=False,
1682 +                      help="compress the new repodata before adding it to the repo")
1683 +    parser.add_option("--compress-type", dest='compress_type', default='gz',
1684 +                      help="compression format to use")
1685 +    parser.usage = "modifyrepo [options] [--remove] <input_metadata> <output repodata>"
1686      
1687      (opts, argsleft) = parser.parse_args(args)
1688      if len(argsleft) != 2:
1689 @@ -137,11 +192,28 @@ def main(args):
1690      except MDError, e:
1691          print "Could not access repository: %s" % str(e)
1692          return 1
1693 +
1694 +
1695 +    repomd.compress = opts.compress
1696 +    if opts.compress_type in _available_compression:
1697 +        repomd.compress_type = opts.compress_type
1698 +
1699 +    # remove
1700 +    if opts.remove:
1701 +        try:
1702 +            repomd.remove(metadata)
1703 +        except MDError, ex:
1704 +            print "Could not remove metadata: %s" % (metadata, str(ex))
1705 +            return 1
1706 +        return
1707 +
1708 +    # add
1709      try:
1710          repomd.add(metadata, mdtype=opts.mdtype)
1711      except MDError, e:
1712          print "Could not add metadata from file %s: %s" % (metadata, str(e))
1713          return 1
1714 +    
1715  
1716  if __name__ == '__main__':
1717      ret = main(sys.argv[1:])
1718 diff --git a/worker.py b/worker.py
1719 index eb35ef7..fe6758f 100755
1720 --- a/worker.py
1721 +++ b/worker.py
1722 @@ -5,6 +5,7 @@ import yum
1723  import createrepo
1724  import os
1725  import rpmUtils
1726 +import re
1727  from optparse import OptionParser
1728  
1729  
1730 @@ -23,6 +24,8 @@ def main(args):
1731      parser = OptionParser()
1732      parser.add_option('--tmpmdpath', default=None, 
1733                  help="path where the outputs should be dumped for this worker")
1734 +    parser.add_option('--pkglist', default=None, 
1735 +                help="file to read the pkglist from in lieu of all of them on the cli")
1736      parser.add_option("--pkgoptions", default=[], action='append',
1737                  help="pkgoptions in the format of key=value")
1738      parser.add_option("--quiet", default=False, action='store_true',
1739 @@ -36,10 +39,6 @@ def main(args):
1740      opts, pkgs = parser.parse_args(args)
1741      external_data = {'_packagenumber': 1}
1742      globalopts = {}
1743 -    if not opts.tmpmdpath:
1744 -        print >> sys.stderr, "tmpmdpath required for destination files"
1745 -        sys.exit(1)
1746 -    
1747      
1748      for strs in opts.pkgoptions:
1749          k,v = strs.split('=')
1750 @@ -64,15 +63,34 @@ def main(args):
1751      
1752      reldir = external_data['_reldir']
1753      ts = rpmUtils.transaction.initReadOnlyTransaction()
1754 -    pri = open(opts.tmpmdpath + '/primary.xml' , 'w')
1755 -    fl = open(opts.tmpmdpath  + '/filelists.xml' , 'w')
1756 -    other = open(opts.tmpmdpath  + '/other.xml' , 'w')
1757 -    
1758 -    
1759 +    if opts.tmpmdpath:
1760 +        files = [open(opts.tmpmdpath + '/%s.xml' % i, 'w')
1761 +                 for i in ('primary', 'filelists', 'other')]
1762 +        def output(*xml):
1763 +            for fh, buf in zip(files, xml):
1764 +                fh.write(buf)
1765 +    else:
1766 +        def output(*xml):
1767 +            buf = ' '.join(str(len(i)) for i in xml)
1768 +            sys.stdout.write('*** %s\n' % buf)
1769 +            for buf in xml:
1770 +                sys.stdout.write(buf)
1771 +
1772 +    if opts.pkglist:
1773 +        for line in open(opts.pkglist,'r').readlines():
1774 +            line = line.strip()
1775 +            if re.match('^\s*\#.*', line) or re.match('^\s*$', line):
1776 +                continue
1777 +            pkgs.append(line)
1778 +
1779 +    clog_limit=globalopts.get('clog_limit', None)
1780 +    if clog_limit is not None:
1781 +         clog_limit = int(clog_limit)
1782      for pkgfile in pkgs:
1783          pkgpath = reldir + '/' + pkgfile
1784          if not os.path.exists(pkgpath):
1785              print >> sys.stderr, "File not found: %s" % pkgpath
1786 +            output()
1787              continue
1788  
1789          try:
1790 @@ -80,20 +98,17 @@ def main(args):
1791                  print "reading %s" % (pkgfile)
1792  
1793              pkg = createrepo.yumbased.CreateRepoPackage(ts, package=pkgpath, 
1794 -                                                        external_data=external_data)
1795 -            pri.write(pkg.xml_dump_primary_metadata())
1796 -            fl.write(pkg.xml_dump_filelists_metadata())
1797 -            other.write(pkg.xml_dump_other_metadata(clog_limit=
1798 -                                            globalopts.get('clog_limit', None)))
1799 +                                sumtype=globalopts.get('sumtype', None), 
1800 +                                external_data=external_data)
1801 +            output(pkg.xml_dump_primary_metadata(),
1802 +                   pkg.xml_dump_filelists_metadata(),
1803 +                   pkg.xml_dump_other_metadata(clog_limit=clog_limit))
1804          except yum.Errors.YumBaseError, e:
1805              print >> sys.stderr, "Error: %s" % e
1806 +            output()
1807              continue
1808          else:
1809              external_data['_packagenumber']+=1
1810          
1811 -    pri.close()
1812 -    fl.close()
1813 -    other.close()
1814 -    
1815  if __name__ == "__main__":
1816      main(sys.argv[1:])
This page took 1.435077 seconds and 3 git commands to generate.