1 commit 697db3c82877c46c4567e744a8bc15f1748b94f7
2 Author: Arkadiusz Miśkiewicz <arekm@maven.pl>
3 Date: Fri Nov 28 12:47:40 2014 +0100
5 gitrepo.check_remote() is expensive, so make it run in parallel.
7 gitrepo.check_remote() now runs in parallel (as part of
8 fetch_package()). Unfortunately lambdas cannot be pickled,
9 so we have to get rid of it.
11 diff --git a/git_slug/refsdata.py b/git_slug/refsdata.py
12 index 4354ac4..67592f8 100644
13 --- a/git_slug/refsdata.py
14 +++ b/git_slug/refsdata.py
15 @@ -16,7 +16,7 @@ class NoMatchedRepos(Exception):
18 def __init__(self, stream, pattern, dirpattern=('*',)):
19 - self.heads = collections.defaultdict(lambda: collections.defaultdict(lambda: EMPTYSHA1))
20 + self.heads = collections.defaultdict(self.__dict_var__)
21 pats = re.compile('|'.join(fnmatch.translate(os.path.join('refs/heads', p)) for p in pattern))
22 dirpat = re.compile('|'.join(fnmatch.translate(p) for p in dirpattern))
23 for line in stream.readlines():
24 @@ -28,6 +28,12 @@ class RemoteRefsData:
28 + def __dict_init__(self):
31 + def __dict_var__(self):
32 + return collections.defaultdict(self.__dict_init__)
34 def put(self, repo, data):
36 (sha1_old, sha1, ref) = line.split()
37 diff --git a/slug.py b/slug.py
38 index b576df8..68f68cd 100755
41 @@ -96,7 +96,14 @@ def getrefs(*args):
45 -def fetch_package(gitrepo, ref2fetch, options):
46 +def fetch_package(gitrepo, refs_heads, options):
48 + for ref in refs_heads:
49 + if gitrepo.check_remote(ref) != refs_heads[ref]:
50 + ref2fetch.append('+{}:{}/{}'.format(ref, REMOTEREFS, ref[len('refs/heads/'):]))
52 + ref2fetch.append('refs/notes/*:refs/notes/*')
55 (stdout, stderr) = gitrepo.fetch(ref2fetch, options.depth)
57 @@ -130,13 +137,7 @@ def fetch_packages(options, return_all=False):
60 gitrepo = GitRepo(os.path.join(options.packagesdir, pkgdir))
62 - for ref in refs.heads[pkgdir]:
63 - if gitrepo.check_remote(ref) != refs.heads[pkgdir][ref]:
64 - ref2fetch.append('+{}:{}/{}'.format(ref, REMOTEREFS, ref[len('refs/heads/'):]))
66 - ref2fetch.append('refs/notes/*:refs/notes/*')
67 - args.append((gitrepo, ref2fetch, options))
68 + args.append((gitrepo, refs.heads[pkgdir], options))
71 pool = WorkerPool(options.jobs, pool_worker_init)
73 commit 3aa5fead45cce8c63eef64b98ce5dd215cd7dc24
74 Author: Arkadiusz Miśkiewicz <arekm@maven.pl>
75 Date: Fri Nov 28 12:15:20 2014 +0100
77 Initialize updated_repos to empty list.
79 diff --git a/slug.py b/slug.py
80 index 914e894..b576df8 100755
83 @@ -138,6 +138,7 @@ def fetch_packages(options, return_all=False):
84 ref2fetch.append('refs/notes/*:refs/notes/*')
85 args.append((gitrepo, ref2fetch, options))
88 pool = WorkerPool(options.jobs, pool_worker_init)
90 updated_repos = pool.starmap(fetch_package, args)
92 commit 3482f3141eb1ecc9cc44d7b6d5af359960a49e73
93 Author: Arkadiusz Miśkiewicz <arekm@maven.pl>
94 Date: Fri Nov 28 12:13:27 2014 +0100
96 Parallelize initpackage operation.
98 diff --git a/slug.py b/slug.py
99 index fa8fd89..914e894 100755
102 @@ -108,15 +108,25 @@ def fetch_package(gitrepo, ref2fetch, options):
103 def fetch_packages(options, return_all=False):
104 refs = getrefs(options.branch, options.repopattern)
105 print('Read remotes data')
107 + if options.newpkgs:
108 + for pkgdir in sorted(refs.heads):
109 + gitdir = os.path.join(options.packagesdir, pkgdir, '.git')
110 + if not os.path.isdir(gitdir):
111 + pkgs_new.append(pkgdir)
113 + pool = WorkerPool(options.jobs, pool_worker_init)
115 + pool.starmap(initpackage, zip(pkgs_new, [options] * len(pkgs_new)))
116 + except KeyboardInterrupt:
123 for pkgdir in sorted(refs.heads):
124 - gitdir = os.path.join(options.packagesdir, pkgdir, '.git')
125 - if not os.path.isdir(gitdir):
126 - if options.newpkgs:
127 - gitrepo = initpackage(pkgdir, options)
130 - elif options.omitexisting:
131 + if options.omitexisting and pkgdir not in pkgs_new:
134 gitrepo = GitRepo(os.path.join(options.packagesdir, pkgdir))
136 commit 29ab16f193cf3ebccb0c044b98f2ba9be98c3090
137 Author: Arkadiusz Miśkiewicz <arekm@maven.pl>
138 Date: Sun Nov 23 00:32:46 2014 +0100
140 Make updateall off by default.
142 Turn updateall to off by default so 'git pld pull' will only checkout
143 packages that were fetched at this session. If you want old behaviour
144 use 'git pld pull --all'.
146 diff --git a/slug.py b/slug.py
147 index d083cf4..fa8fd89 100755
150 @@ -274,7 +274,7 @@ default_options['fetch'] = {'branch': '[*]', 'prune': False, 'newpkgs': False, '
152 pull = subparsers.add_parser('pull', help='git-pull in all existing repositories', parents=[common_fetchoptions],
153 formatter_class=argparse.RawDescriptionHelpFormatter)
154 -pull.add_argument('--all', help='update local branches in all repositories', dest='updateall', action='store_true', default=True)
155 +pull.add_argument('--all', help='update local branches in all repositories', dest='updateall', action='store_true', default=False)
156 pull.add_argument('--noall', help='update local branches only when something has been fetched', dest='updateall', action='store_false', default=True)
157 newpkgsopt = pull.add_mutually_exclusive_group()
158 newpkgsopt.add_argument('-n', '--newpkgs', help='download packages that do not exist on local side',
160 commit da9abb0e6d7ef1a1440e7f5ac4ad4dbf5538dc99
161 Author: Arkadiusz Miśkiewicz <arekm@maven.pl>
162 Date: Sun Nov 23 00:28:22 2014 +0100
164 Add --newpkgs/--nonewpkgs to pull command.
166 Allow 'git pld pull --newpkgs' to also fetch and pull new packages. Off
169 diff --git a/slug.py b/slug.py
170 index da9c050..d083cf4 100755
173 @@ -276,8 +276,12 @@ pull = subparsers.add_parser('pull', help='git-pull in all existing repositories
174 formatter_class=argparse.RawDescriptionHelpFormatter)
175 pull.add_argument('--all', help='update local branches in all repositories', dest='updateall', action='store_true', default=True)
176 pull.add_argument('--noall', help='update local branches only when something has been fetched', dest='updateall', action='store_false', default=True)
177 +newpkgsopt = pull.add_mutually_exclusive_group()
178 +newpkgsopt.add_argument('-n', '--newpkgs', help='download packages that do not exist on local side',
179 + action='store_true')
180 +newpkgsopt.add_argument('-nn', '--nonewpkgs', help='do not download new packages', dest='newpkgs', action='store_false')
181 pull.set_defaults(func=pull_packages, branch='[*]', prune=False, newpkgs=False, omitexisting=False)
182 -default_options['pull'] = {'branch': ['*'], 'prune': False, 'newpkgs': False, 'omitexisting': False}
183 +default_options['pull'] = {'branch': ['*'], 'prune': False, 'omitexisting': False}
185 checkout =subparsers.add_parser('checkout', help='checkout repositories', parents=[common_fetchoptions],
186 formatter_class=argparse.RawDescriptionHelpFormatter)
188 commit b1096c634ea9b262bd791863d68e2aed3847078d
189 Author: Arkadiusz Miśkiewicz <arekm@maven.pl>
190 Date: Sun Nov 23 00:18:38 2014 +0100
192 Parallelize fetching, checking out, cloning.
194 Parallelize fetching, checking out, cloning using multiprocessing
197 By default use number of parallel processes equal to number of
198 system CPUs (use old value, 4, as fallback).
200 Also replace thread based ThreadFetch() with the same multiprocessing
201 mechanism as above for consistency.
203 diff --git a/slug.py b/slug.py
204 index 69bd3b9..da9c050 100755
207 @@ -7,26 +7,18 @@ import os
213 +import multiprocessing
219 +from multiprocessing import Pool as WorkerPool
221 from git_slug.gitconst import GITLOGIN, GITSERVER, GIT_REPO, GIT_REPO_PUSH, REMOTE_NAME, REMOTEREFS
222 from git_slug.gitrepo import GitRepo, GitRepoError
223 from git_slug.refsdata import GitArchiveRefsData, NoMatchedRepos, RemoteRefsError
226 - def __init__(self):
227 - self.lock = threading.Lock()
230 - def put(self, item):
232 - self.items.append(item)
234 class UnquoteConfig(configparser.ConfigParser):
235 def get(self, section, option, **kwargs):
236 value = super().get(section, option, **kwargs)
237 @@ -43,25 +35,15 @@ class DelAppend(argparse._AppendAction):
239 setattr(namespace, self.dest, item)
241 -class ThreadFetch(threading.Thread):
242 - def __init__(self, queue, output, pkgdir, depth=0):
243 - threading.Thread.__init__(self)
245 - self.packagesdir = pkgdir
247 - self.output = output
251 - (gitrepo, ref2fetch) = self.queue.get()
253 - (stdout, stderr) = gitrepo.fetch(ref2fetch, self.depth)
255 - print('------', gitrepo.gdir[:-len('.git')], '------\n' + stderr.decode('utf-8'))
256 - self.output.put(gitrepo)
257 - except GitRepoError as e:
258 - print('------', gitrepo.gdir[:-len('.git')], '------\n', e)
259 - self.queue.task_done()
262 + return multiprocessing.cpu_count()
263 + except NotImplementedError:
267 +def pool_worker_init():
268 + signal.signal(signal.SIGINT, signal.SIG_IGN)
270 def readconfig(path):
271 config = UnquoteConfig(delimiters='=', interpolation=None, strict=False)
272 @@ -114,18 +96,19 @@ def getrefs(*args):
276 +def fetch_package(gitrepo, ref2fetch, options):
278 + (stdout, stderr) = gitrepo.fetch(ref2fetch, options.depth)
280 + print('------', gitrepo.gdir[:-len('.git')], '------\n' + stderr.decode('utf-8'))
282 + except GitRepoError as e:
283 + print('------', gitrepo.gdir[:-len('.git')], '------\n', e)
285 def fetch_packages(options, return_all=False):
286 - fetch_queue = queue.Queue()
287 - updated_repos = Store()
288 - for i in range(options.jobs):
289 - t = ThreadFetch(fetch_queue, updated_repos, options.packagesdir, options.depth)
293 - signal.signal(signal.SIGINT, signal.SIG_DFL)
295 refs = getrefs(options.branch, options.repopattern)
296 print('Read remotes data')
298 for pkgdir in sorted(refs.heads):
299 gitdir = os.path.join(options.packagesdir, pkgdir, '.git')
300 if not os.path.isdir(gitdir):
301 @@ -143,9 +126,18 @@ def fetch_packages(options, return_all=False):
302 ref2fetch.append('+{}:{}/{}'.format(ref, REMOTEREFS, ref[len('refs/heads/'):]))
304 ref2fetch.append('refs/notes/*:refs/notes/*')
305 - fetch_queue.put((gitrepo, ref2fetch))
306 + args.append((gitrepo, ref2fetch, options))
309 + pool = WorkerPool(options.jobs, pool_worker_init)
311 + updated_repos = pool.starmap(fetch_package, args)
312 + except KeyboardInterrupt:
318 + updated_repos = list(filter(None, updated_repos))
322 @@ -158,26 +150,60 @@ def fetch_packages(options, return_all=False):
326 - return updated_repos.items
327 + return updated_repos
329 +def checkout_package(repo, options):
331 + repo.checkout(options.checkout)
332 + except GitRepoError as e:
333 + print('Problem with checking branch {} in repo {}: {}'.format(options.checkout, repo.gdir, e), file=sys.stderr)
335 def checkout_packages(options):
336 if options.checkout is None:
337 options.checkout = "/".join([REMOTE_NAME, options.branch[0]])
338 fetch_packages(options)
339 refs = getrefs(options.branch, options.repopattern)
341 for pkgdir in sorted(refs.heads):
342 - repo = GitRepo(os.path.join(options.packagesdir, pkgdir))
344 - repo.checkout(options.checkout)
345 - except GitRepoError as e:
346 - print('Problem with checking branch {} in repo {}: {}'.format(options.checkout, repo.gdir, e), file=sys.stderr)
347 + repos.append(GitRepo(os.path.join(options.packagesdir, pkgdir)))
348 + pool = WorkerPool(options.jobs)
350 + pool.starmap(checkout_package, zip(repos, [options] * len(repos)))
351 + except KeyboardInterrupt:
357 +def clone_package(repo, options):
359 + repo.checkout('master')
360 + except GitRepoError as e:
361 + print('Problem with checking branch master in repo {}: {}'.format(repo.gdir, e), file=sys.stderr)
363 def clone_packages(options):
364 - for repo in fetch_packages(options):
366 - repo.checkout('master')
367 - except GitRepoError as e:
368 - print('Problem with checking branch master in repo {}: {}'.format(repo.gdir, e), file=sys.stderr)
369 + repos = fetch_packages(options)
370 + pool = WorkerPool(options.jobs)
372 + pool.starmap(clone_package, zip(repos, [options] * len(repos)))
373 + except KeyboardInterrupt:
379 +def pull_package(gitrepo, options):
380 + directory = os.path.basename(gitrepo.wtree)
382 + (out, err) = gitrepo.commandexc(['rev-parse', '-q', '--verify', '@{u}'])
383 + sha1 = out.decode().strip()
384 + (out, err) = gitrepo.commandexc(['rebase', sha1])
385 + for line in out.decode().splitlines():
386 + print(directory,":",line)
387 + except GitRepoError as e:
388 + for line in e.args[0].splitlines():
389 + print("{}: {}".format(directory,line))
392 def pull_packages(options):
394 @@ -189,19 +215,14 @@ def pull_packages(options):
396 repolist = fetch_packages(options, False)
397 print('--------Pulling------------')
398 - for gitrepo in repolist:
399 - directory = os.path.basename(gitrepo.wtree)
401 - (out, err) = gitrepo.commandexc(['rev-parse', '-q', '--verify', '@{u}'])
402 - sha1 = out.decode().strip()
403 - (out, err) = gitrepo.commandexc(['rebase', sha1])
404 - for line in out.decode().splitlines():
405 - print(directory,":",line)
406 - except GitRepoError as e:
407 - for line in e.args[0].splitlines():
408 - print("{}: {}".format(directory,line))
411 + pool = WorkerPool(options.jobs, pool_worker_init)
413 + pool.starmap(pull_package, zip(repolist, [options] * len(repolist)))
414 + except KeyboardInterrupt:
420 def list_packages(options):
421 refs = getrefs(options.branch, options.repopattern)
422 @@ -213,7 +234,7 @@ common_options.add_argument('-d', '--packagesdir', help='local directory with gi
423 default=os.path.expanduser('~/rpm/packages'))
425 common_fetchoptions = argparse.ArgumentParser(add_help=False, parents=[common_options])
426 -common_fetchoptions.add_argument('-j', '--jobs', help='number of threads to use', default=4, type=int)
427 +common_fetchoptions.add_argument('-j', '--jobs', help='number of threads to use', default=cpu_count(), type=int)
428 common_fetchoptions.add_argument('repopattern', nargs='*', default = ['*'])
429 common_fetchoptions.add_argument('--depth', help='depth of fetch', default=0)
432 commit fac30722a98a4d6300822fd3f790ce1fa48e7d83
433 Author: Arkadiusz Miśkiewicz <arekm@maven.pl>
434 Date: Sun Nov 23 00:15:10 2014 +0100
436 check_remote(): Add support to packed refs database.
438 check_remote() did not handle git packed refs database. That made
439 fetch_packages() to always fetch packages even if we already had
442 Supporting packed refs database fixes this problem.
444 diff --git a/git_slug/gitrepo.py b/git_slug/gitrepo.py
445 index 5234deb..d9f88ee 100644
446 --- a/git_slug/gitrepo.py
447 +++ b/git_slug/gitrepo.py
448 @@ -82,12 +82,21 @@ class GitRepo:
449 'refs/notes/*:refs/notes/*'])
451 def check_remote(self, ref, remote=REMOTE_NAME):
452 + localref = EMPTYSHA1
453 ref = ref.replace(REFFILE, os.path.join('remotes', remote))
455 with open(os.path.join(self.gdir, ref), 'r') as f:
456 localref = f.readline().strip()
458 - localref = EMPTYSHA1
460 + with open(os.path.join(self.gdir, 'packed-refs')) as f:
462 + line_data = line.split()
463 + if len(line_data) == 2 and line_data[1] == ref:
464 + localref = line_data[0].strip()
470 def showfile(self, filename, ref="/".join([REMOTE_NAME, "master"])):