vlambda博客
学习文章列表

源码分析之python 文件遍历os.walk()方法

先上源码

def walk(top, topdown=True, onerror=None, followlinks=False): """ Example:
import os from os.path import join, getsize for root, dirs, files in os.walk('python/Lib/email'): print(root, "consumes", end="") print(sum([getsize(join(root, name)) for name in files]), end="") print("bytes in", len(files), "non-directory files") if 'CVS' in dirs: dirs.remove('CVS') # don't visit CVS directories
"""
dirs = [] nondirs = []
# We may not have read permission for top, in which case we can't # get a list of the files the directory contains. os.walk # always suppressed the exception then, rather than blow up for a # minor reason when (say) a thousand readable directories are still # left to visit. That logic is copied here. try: if name == 'nt' and isinstance(top, bytes): scandir_it = _dummy_scandir(top) else: # Note that scandir is global in this module due # to earlier import-*. scandir_it = scandir(top) entries = list(scandir_it) except OSError as error: if onerror is not None: onerror(error) return
for entry in entries: try: is_dir = entry.is_dir() except OSError: # If is_dir() raises an OSError, consider that the entry is not # a directory, same behaviour than os.path.isdir(). is_dir = False
if is_dir: dirs.append(entry.name) else: nondirs.append(entry.name)
if not topdown and is_dir: # Bottom-up: recurse into sub-directory, but exclude symlinks to # directories if followlinks is False if followlinks: walk_into = True else: try: is_symlink = entry.is_symlink() except OSError: # If is_symlink() raises an OSError, consider that the # entry is not a symbolic link, same behaviour than # os.path.islink(). is_symlink = False walk_into = not is_symlink
if walk_into: yield from walk(entry.path, topdown, onerror, followlinks)
# Yield before recursion if going top down if topdown: yield top, dirs, nondirs
# Recurse into sub-directories islink, join = path.islink, path.join for dirname in dirs: new_path = join(top, dirname) # Issue #23605: os.path.islink() is used instead of caching # entry.is_symlink() result during the loop on os.scandir() because # the caller can replace the directory entry during the "yield" # above. if followlinks or not islink(new_path): yield from walk(new_path, topdown, onerror, followlinks) else: # Yield after recursion if going bottom up yield top, dirs, nondirs

先说参数:

top, 要遍历的文件的根目录

topdown=True,遍历方式,true是自上而下,false是自下而上

onerror=None,报错处理

followlinks=False,是否是软连接,false表示不是软连接,true表示是软连接


具体逻辑

第一部分try except先对根目录做一些判断,符合条件将其转换成列表赋值给实体entries


第二部分遍历实体entries,判断是目录还是非目录文件,存放不同的列表容器中紧接着判断topdown如果是false并且如果是目录就继续进行下面的逻辑,这一部分是自下而上的遍历逻辑。其中如果遇到软连接followlinks,进行递归遍历。


第三部分是单独从topdown是true还是false的角度进行遍历


yield 以及 yield from生成器的学习笔记有机会再总结


补充说明topdown的两种情况,有大佬进行了测试,具体见下图


看完一次源码后坚信不疑自己是菜狗【手动狗头】