源码分析之python 文件遍历os.walk()方法
先上源码
def walk(top, topdown=True, onerror=None, followlinks=False):
"""
Example:
import os
from os.path import join, getsize
for root, dirs, files in os.walk('python/Lib/email'):
print(root, "consumes", end="")
print(sum([getsize(join(root, name)) for name in files]), end="")
print("bytes in", len(files), "non-directory files")
if 'CVS' in dirs:
dirs.remove('CVS') # don't visit CVS directories
"""
dirs = []
nondirs = []
# We may not have read permission for top, in which case we can't
# get a list of the files the directory contains. os.walk
# always suppressed the exception then, rather than blow up for a
# minor reason when (say) a thousand readable directories are still
# left to visit. That logic is copied here.
try:
if name == 'nt' and isinstance(top, bytes):
scandir_it = _dummy_scandir(top)
else:
# Note that scandir is global in this module due
# to earlier import-*.
scandir_it = scandir(top)
entries = list(scandir_it)
except OSError as error:
if onerror is not None:
onerror(error)
return
for entry in entries:
try:
is_dir = entry.is_dir()
except OSError:
# If is_dir() raises an OSError, consider that the entry is not
# a directory, same behaviour than os.path.isdir().
is_dir = False
if is_dir:
dirs.append(entry.name)
else:
nondirs.append(entry.name)
if not topdown and is_dir:
# Bottom-up: recurse into sub-directory, but exclude symlinks to
# directories if followlinks is False
if followlinks:
walk_into = True
else:
try:
is_symlink = entry.is_symlink()
except OSError:
# If is_symlink() raises an OSError, consider that the
# entry is not a symbolic link, same behaviour than
# os.path.islink().
is_symlink = False
walk_into = not is_symlink
if walk_into:
yield from walk(entry.path, topdown, onerror, followlinks)
# Yield before recursion if going top down
if topdown:
yield top, dirs, nondirs
# Recurse into sub-directories
islink, join = path.islink, path.join
for dirname in dirs:
new_path = join(top, dirname)
# Issue #23605: os.path.islink() is used instead of caching
# entry.is_symlink() result during the loop on os.scandir() because
# the caller can replace the directory entry during the "yield"
# above.
if followlinks or not islink(new_path):
yield from walk(new_path, topdown, onerror, followlinks)
else:
# Yield after recursion if going bottom up
yield top, dirs, nondirs
先说参数:
top, 要遍历的文件的根目录
topdown=True,遍历方式,true是自上而下,false是自下而上
onerror=None,报错处理
followlinks=False,是否是软连接,false表示不是软连接,true表示是软连接
具体逻辑
第一部分try except先对根目录做一些判断,符合条件将其转换成列表赋值给实体entries
第二部分遍历实体entries,判断是目录还是非目录文件,存放不同的列表容器中紧接着判断topdown如果是false并且如果是目录就继续进行下面的逻辑,这一部分是自下而上的遍历逻辑。其中如果遇到软连接followlinks,进行递归遍历。
第三部分是单独从topdown是true还是false的角度进行遍历
yield 以及 yield from生成器的学习笔记有机会再总结
补充说明topdown的两种情况,有大佬进行了测试,具体见下图
看完一次源码后坚信不疑自己是菜狗【手动狗头】