1 """ path.py - An object representing a path to a file or directory.
6 d = path('/home/guido/bin')
7 for f in d.files('*.py'):
10 This module requires Python 2.2 or later.
13 URL: http://www.jorendorff.com/articles/python/path
14 Author: Jason Orendorff <jason@jorendorff.com> (and others - see the url!)
20 # - Bug in write_text(). It doesn't support Universal newline mode.
21 # - Better error message in listdir() when self isn't a
22 # directory. (On Windows, the error message really sucks.)
23 # - Make sure everything has a good docstring.
24 # - Add methods for regex find and replace.
25 # - guess_content_type() method?
26 # - Perhaps support arguments to touch().
27 # - Could add split() and join() methods that generate warnings.
28 # - Note: __add__() technically has a bug, I think, where
29 # it doesn't play nice with other types that implement
30 # __radd__(). Test this.
32 from __future__ import generators
34 import sys, os, fnmatch, glob, shutil, codecs
39 # Pre-2.3 support. Are unicode filenames supported?
42 if os.path.supports_unicode_filenames:
44 except AttributeError:
47 # Pre-2.3 workaround for basestring.
51 basestring = (str, unicode)
53 # Universal newline support
55 if hasattr(file, 'newlines'):
60 """ Represents a filesystem path.
62 For documentation on individual methods, consult their
63 counterparts in os.path.
66 # --- Special Python methods.
69 return 'path(%s)' % _base.__repr__(self)
71 # Adding a path and a string yields a path.
72 def __add__(self, more):
73 return path(_base(self) + more)
75 def __radd__(self, other):
76 return path(other + _base(self))
78 # The / operator joins paths.
79 def __div__(self, rel):
80 """ fp.__div__(rel) == fp / rel == fp.joinpath(rel)
82 Join two path components, adding a separator character if
85 return path(os.path.join(self, rel))
87 # Make the / operator work even when true division is enabled.
91 """ Return the current working directory as a path object. """
92 return path(os.getcwd())
93 getcwd = staticmethod(getcwd)
96 # --- Operations on path strings.
98 def abspath(self): return path(os.path.abspath(self))
99 def normcase(self): return path(os.path.normcase(self))
100 def normpath(self): return path(os.path.normpath(self))
101 def realpath(self): return path(os.path.realpath(self))
102 def expanduser(self): return path(os.path.expanduser(self))
103 def expandvars(self): return path(os.path.expandvars(self))
104 def dirname(self): return path(os.path.dirname(self))
105 basename = os.path.basename
108 """ Clean up a filename by calling expandvars(),
109 expanduser(), and normpath() on it.
111 This is commonly everything needed to clean up a filename
112 read from a configuration file, for example.
114 return self.expandvars().expanduser().normpath()
116 def _get_namebase(self):
117 base, ext = os.path.splitext(self.name)
121 f, ext = os.path.splitext(_base(self))
124 def _get_drive(self):
125 drive, r = os.path.splitdrive(self)
130 """ This path's parent directory, as a new path object.
132 For example, path('/usr/local/lib/libpython.so').parent == path('/usr/local/lib')
136 basename, None, None,
137 """ The name of this file or directory without the full path.
139 For example, path('/usr/local/lib/libpython.so').name == 'libpython.so'
143 _get_namebase, None, None,
144 """ The same as path.name, but with one file extension stripped off.
146 For example, path('/home/guido/python.tar.gz').name == 'python.tar.gz',
147 but path('/home/guido/python.tar.gz').namebase == 'python.tar'
151 _get_ext, None, None,
152 """ The file extension, for example '.py'. """)
155 _get_drive, None, None,
156 """ The drive specifier, for example 'C:'.
157 This is always empty on systems that don't use drive specifiers.
161 """ p.splitpath() -> Return (p.parent, p.name). """
162 parent, child = os.path.split(self)
163 return path(parent), child
165 def splitdrive(self):
166 """ p.splitdrive() -> Return (p.drive, <the rest of p>).
168 Split the drive specifier from this path. If there is
169 no drive specifier, p.drive is empty, so the return value
170 is simply (path(''), p). This is always the case on Unix.
172 drive, rel = os.path.splitdrive(self)
173 return path(drive), rel
176 """ p.splitext() -> Return (p.stripext(), p.ext).
178 Split the filename extension from this path and return
179 the two parts. Either part may be empty.
181 The extension is everything from '.' to the end of the
182 last path segment. This has the property that if
183 (a, b) == p.splitext(), then a + b == p.
185 filename, ext = os.path.splitext(self)
186 return path(filename), ext
189 """ p.stripext() -> Remove one file extension from the path.
191 For example, path('/home/guido/python.tar.gz').stripext()
192 returns path('/home/guido/python.tar').
194 return self.splitext()[0]
196 if hasattr(os.path, 'splitunc'):
198 unc, rest = os.path.splitunc(self)
199 return path(unc), rest
201 def _get_uncshare(self):
202 unc, r = os.path.splitunc(self)
206 _get_uncshare, None, None,
207 """ The UNC mount point for this path.
208 This is empty for paths on local drives. """)
210 def joinpath(self, *args):
211 """ Join two or more path components, adding a separator
212 character (os.sep) if needed. Returns a new path
215 return path(os.path.join(self, *args))
218 """ Return a list of the path components in this path.
220 The first item in the list will be a path. Its value will be
221 either os.curdir, os.pardir, empty, or the root directory of
222 this path (for example, '/' or 'C:\\'). The other items in
223 the list will be strings.
225 path.path.joinpath(*result) will yield the original path.
229 while loc != os.curdir and loc != os.pardir:
231 loc, child = prev.splitpath()
240 """ Return this path as a relative path,
241 based from the current working directory.
243 cwd = path(os.getcwd())
244 return cwd.relpathto(self)
246 def relpathto(self, dest):
247 """ Return a relative path from self to dest.
249 If there is no relative path from self to dest, for example if
250 they reside on different drives in Windows, then this returns
253 origin = self.abspath()
254 dest = path(dest).abspath()
256 orig_list = origin.normcase().splitall()
257 # Don't normcase dest! We want to preserve the case.
258 dest_list = dest.splitall()
260 if orig_list[0] != os.path.normcase(dest_list[0]):
261 # Can't get here from there.
264 # Find the location where the two paths start to differ.
266 for start_seg, dest_seg in zip(orig_list, dest_list):
267 if start_seg != os.path.normcase(dest_seg):
271 # Now i is the point where the two paths diverge.
272 # Need a certain number of "os.pardir"s to work up
273 # from the origin to the point of divergence.
274 segments = [os.pardir] * (len(orig_list) - i)
275 # Need to add the diverging part of dest_list.
276 segments += dest_list[i:]
277 if len(segments) == 0:
278 # If they happen to be identical, use os.curdir.
279 return path(os.curdir)
281 return path(os.path.join(*segments))
284 # --- Listing, searching, walking, and matching
286 def listdir(self, pattern=None):
287 """ D.listdir() -> List of items in this directory.
289 Use D.files() or D.dirs() instead if you want a listing
290 of just files or just subdirectories.
292 The elements of the list are path objects.
294 With the optional 'pattern' argument, this only lists
295 items whose names match the given pattern.
297 names = os.listdir(self)
298 if pattern is not None:
299 names = fnmatch.filter(names, pattern)
300 return [self / child for child in names]
302 def dirs(self, pattern=None):
303 """ D.dirs() -> List of this directory's subdirectories.
305 The elements of the list are path objects.
306 This does not walk recursively into subdirectories
307 (but see path.walkdirs).
309 With the optional 'pattern' argument, this only lists
310 directories whose names match the given pattern. For
311 example, d.dirs('build-*').
313 return [p for p in self.listdir(pattern) if p.isdir()]
315 def files(self, pattern=None):
316 """ D.files() -> List of the files in this directory.
318 The elements of the list are path objects.
319 This does not walk into subdirectories (see path.walkfiles).
321 With the optional 'pattern' argument, this only lists files
322 whose names match the given pattern. For example,
326 return [p for p in self.listdir(pattern) if p.isfile()]
328 def walk(self, pattern=None):
329 """ D.walk() -> iterator over files and subdirs, recursively.
331 The iterator yields path objects naming each child item of
332 this directory and its descendants. This requires that
335 This performs a depth-first traversal of the directory tree.
336 Each directory is returned just before all its children.
338 for child in self.listdir():
339 if pattern is None or child.fnmatch(pattern):
342 for item in child.walk(pattern):
345 def walkdirs(self, pattern=None):
346 """ D.walkdirs() -> iterator over subdirs, recursively.
348 With the optional 'pattern' argument, this yields only
349 directories whose names match the given pattern. For
350 example, mydir.walkdirs('*test') yields only directories
351 with names ending in 'test'.
353 for child in self.dirs():
354 if pattern is None or child.fnmatch(pattern):
356 for subsubdir in child.walkdirs(pattern):
359 def walkfiles(self, pattern=None):
360 """ D.walkfiles() -> iterator over files in D, recursively.
362 The optional argument, pattern, limits the results to files
363 with names that match the pattern. For example,
364 mydir.walkfiles('*.tmp') yields only files with the .tmp
367 for child in self.listdir():
369 if pattern is None or child.fnmatch(pattern):
372 for f in child.walkfiles(pattern):
375 def fnmatch(self, pattern):
376 """ Return True if self.name matches the given pattern.
378 pattern - A filename pattern with wildcards,
381 return fnmatch.fnmatch(self.name, pattern)
383 def glob(self, pattern):
384 """ Return a list of path objects that match the pattern.
386 pattern - a path relative to this directory, with wildcards.
388 For example, path('/users').glob('*/bin/*') returns a list
389 of all the files users have in their bin directories.
391 return map(path, glob.glob(_base(self / pattern)))
394 # --- Reading or writing an entire file at once.
396 def open(self, mode='r'):
397 """ Open this file. Return a file object. """
398 return file(self, mode)
401 """ Open this file, read all bytes, return them as a string. """
408 def write_bytes(self, bytes, append=False):
409 """ Open this file and write the given bytes to it.
411 Default behavior is to overwrite any existing file.
412 Call this with write_bytes(bytes, append=True) to append instead.
424 def text(self, encoding=None, errors='strict'):
425 """ Open this file, read it in, return the content as a string.
427 This uses 'U' mode in Python 2.3 and later, so '\r\n' and '\r'
428 are automatically translated to '\n'.
432 encoding - The Unicode encoding (or character set) of
433 the file. If present, the content of the file is
434 decoded and returned as a unicode object; otherwise
435 it is returned as an 8-bit str.
436 errors - How to handle Unicode errors; see help(str.decode)
437 for the options. Default is 'strict'.
441 f = self.open(_textmode)
448 f = codecs.open(self, 'r', encoding, errors)
449 # (Note - Can't use 'U' mode here, since codecs.open
450 # doesn't support 'U' mode, even in Python 2.3.)
455 return (t.replace(u'\r\n', u'\n')
456 .replace(u'\r\x85', u'\n')
457 .replace(u'\r', u'\n')
458 .replace(u'\x85', u'\n')
459 .replace(u'\u2028', u'\n'))
461 def write_text(self, text, encoding=None, errors='strict', linesep=os.linesep, append=False):
462 """ Write the given text to this file.
464 The default behavior is to overwrite any existing file;
465 to append instead, use the 'append=True' keyword argument.
467 There are two differences between path.write_text() and
468 path.write_bytes(): newline handling and Unicode handling.
473 - text - str/unicode - The text to be written.
475 - encoding - str - The Unicode encoding that will be used.
476 This is ignored if 'text' isn't a Unicode string.
478 - errors - str - How to handle Unicode encoding errors.
479 Default is 'strict'. See help(unicode.encode) for the
480 options. This is ignored if 'text' isn't a Unicode
483 - linesep - keyword argument - str/unicode - The sequence of
484 characters to be used to mark end-of-line. The default is
485 os.linesep. You can also specify None; this means to
486 leave all newlines as they are in 'text'.
488 - append - keyword argument - bool - Specifies what to do if
489 the file already exists (True: append to the end of it;
490 False: overwrite it.) The default is False.
493 --- Newline handling.
495 write_text() converts all standard end-of-line sequences
496 ('\n', '\r', and '\r\n') to your platform's default end-of-line
497 sequence (see os.linesep; on Windows, for example, the
498 end-of-line marker is '\r\n').
500 If you don't like your platform's default, you can override it
501 using the 'linesep=' keyword argument. If you specifically want
502 write_text() to preserve the newlines as-is, use 'linesep=None'.
504 This applies to Unicode text the same as to 8-bit text, except
505 there are three additional standard Unicode end-of-line sequences:
506 u'\x85', u'\r\x85', and u'\u2028'.
508 (This is slightly different from when you open a file for
509 writing with fopen(filename, "w") in C or file(filename, 'w')
515 If 'text' isn't Unicode, then apart from newline handling, the
516 bytes are written verbatim to the file. The 'encoding' and
517 'errors' arguments are not used and must be omitted.
519 If 'text' is Unicode, it is first converted to bytes using the
520 specified 'encoding' (or the default encoding if 'encoding'
521 isn't specified). The 'errors' argument applies only to this
525 if isinstance(text, unicode):
526 if linesep is not None:
527 # Convert all standard end-of-line sequences to
528 # ordinary newline characters.
529 text = (text.replace(u'\r\n', u'\n')
530 .replace(u'\r\x85', u'\n')
531 .replace(u'\r', u'\n')
532 .replace(u'\x85', u'\n')
533 .replace(u'\u2028', u'\n'))
534 text = text.replace(u'\n', linesep)
536 encoding = sys.getdefaultencoding()
537 bytes = text.encode(encoding, errors)
539 # It is an error to specify an encoding if 'text' is
541 assert encoding is None
543 if linesep is not None:
544 text = (text.replace('\r\n', '\n')
545 .replace('\r', '\n'))
546 bytes = text.replace('\n', linesep)
548 self.write_bytes(bytes, append)
550 def lines(self, encoding=None, errors='strict', retain=True):
551 """ Open this file, read all lines, return them in a list.
554 encoding - The Unicode encoding (or character set) of
555 the file. The default is None, meaning the content
556 of the file is read as 8-bit characters and returned
557 as a list of (non-Unicode) str objects.
558 errors - How to handle Unicode errors; see help(str.decode)
559 for the options. Default is 'strict'
560 retain - If true, retain newline characters; but all newline
561 character combinations ('\r', '\n', '\r\n') are
562 translated to '\n'. If false, newline characters are
563 stripped off. Default is True.
565 This uses 'U' mode in Python 2.3 and later.
567 if encoding is None and retain:
568 f = self.open(_textmode)
574 return self.text(encoding, errors).splitlines(retain)
576 def write_lines(self, lines, encoding=None, errors='strict',
577 linesep=os.linesep, append=False):
578 """ Write the given lines of text to this file.
580 By default this overwrites any existing file at this path.
582 This puts a platform-specific newline sequence on every line.
585 lines - A list of strings.
587 encoding - A Unicode encoding to use. This applies only if
588 'lines' contains any Unicode strings.
590 errors - How to handle errors in Unicode encoding. This
591 also applies only to Unicode strings.
593 linesep - The desired line-ending. This line-ending is
594 applied to every line. If a line already has any
595 standard line ending ('\r', '\n', '\r\n', u'\x85',
596 u'\r\x85', u'\u2028'), that will be stripped off and
597 this will be used instead. The default is os.linesep,
598 which is platform-dependent ('\r\n' on Windows, '\n' on
599 Unix, etc.) Specify None to write the lines as-is,
600 like file.writelines().
602 Use the keyword argument append=True to append lines to the
603 file. The default is to overwrite the file. Warning:
604 When you use this with Unicode data, if the encoding of the
605 existing data in the file is different from the encoding
606 you specify with the encoding= parameter, the result is
607 mixed-encoding data, which can really confuse someone trying
608 to read the file later.
617 isUnicode = isinstance(line, unicode)
618 if linesep is not None:
619 # Strip off any existing line-end and add the
620 # specified linesep string.
622 if line[-2:] in (u'\r\n', u'\x0d\x85'):
624 elif line[-1:] in (u'\r', u'\n',
628 if line[-2:] == '\r\n':
630 elif line[-1:] in ('\r', '\n'):
635 encoding = sys.getdefaultencoding()
636 line = line.encode(encoding, errors)
642 # --- Methods for querying the filesystem.
644 exists = os.path.exists
645 isabs = os.path.isabs
646 isdir = os.path.isdir
647 isfile = os.path.isfile
648 islink = os.path.islink
649 ismount = os.path.ismount
651 if hasattr(os.path, 'samefile'):
652 samefile = os.path.samefile
654 getatime = os.path.getatime
656 getatime, None, None,
657 """ Last access time of the file. """)
659 getmtime = os.path.getmtime
661 getmtime, None, None,
662 """ Last-modified time of the file. """)
664 if hasattr(os.path, 'getctime'):
665 getctime = os.path.getctime
667 getctime, None, None,
668 """ Creation time of the file. """)
670 getsize = os.path.getsize
673 """ Size of the file, in bytes. """)
675 if hasattr(os, 'access'):
676 def access(self, mode):
677 """ Return true if current user has access to this path.
679 mode - One of the constants os.F_OK, os.R_OK, os.W_OK, os.X_OK
681 return os.access(self, mode)
684 """ Perform a stat() system call on this path. """
688 """ Like path.stat(), but do not follow symbolic links. """
689 return os.lstat(self)
691 if hasattr(os, 'statvfs'):
693 """ Perform a statvfs() system call on this path. """
694 return os.statvfs(self)
696 if hasattr(os, 'pathconf'):
697 def pathconf(self, name):
698 return os.pathconf(self, name)
701 # --- Modifying operations on files and directories
703 def utime(self, times):
704 """ Set the access and modified times of this file. """
705 os.utime(self, times)
707 def chmod(self, mode):
710 if hasattr(os, 'chown'):
711 def chown(self, uid, gid):
712 os.chown(self, uid, gid)
714 def rename(self, new):
717 def renames(self, new):
718 os.renames(self, new)
721 # --- Create/delete operations on directories
723 def mkdir(self, mode=0777):
726 def makedirs(self, mode=0777):
727 os.makedirs(self, mode)
732 def removedirs(self):
736 # --- Modifying operations on files
739 """ Set the access/modified times of this file to the current time.
740 Create the file if it does not exist.
742 fd = os.open(self, os.O_WRONLY | os.O_CREAT, 0666)
755 if hasattr(os, 'link'):
756 def link(self, newpath):
757 """ Create a hard link at 'newpath', pointing to this file. """
758 os.link(self, newpath)
760 if hasattr(os, 'symlink'):
761 def symlink(self, newlink):
762 """ Create a symbolic link at 'newlink', pointing here. """
763 os.symlink(self, newlink)
765 if hasattr(os, 'readlink'):
767 """ Return the path to which this symbolic link points.
769 The result may be an absolute or a relative path.
771 return path(os.readlink(self))
773 def readlinkabs(self):
774 """ Return the path to which this symbolic link points.
776 The result is always an absolute path.
782 return (self.parent / p).abspath()
785 # --- High-level functions from shutil
787 copyfile = shutil.copyfile
788 copymode = shutil.copymode
789 copystat = shutil.copystat
792 copytree = shutil.copytree
793 if hasattr(shutil, 'move'):
795 rmtree = shutil.rmtree
798 # --- Special stuff from os
800 if hasattr(os, 'chroot'):
804 if hasattr(os, 'startfile'):