Source code for snsxt.util.find

#!/usr/bin/env python
# -*- coding: utf-8 -*-

"""
Functions for finding files and dirs
"""

import logging
logger = logging.getLogger("find")
logger.debug("loading find module")

import os
import sys
import itertools
import fnmatch
from collections import defaultdict

[docs]def find(search_dir, inclusion_patterns = ('*',), exclusion_patterns = (), search_type = 'all', num_limit = None, level_limit = None, match_mode = "any"): """ Function to search for files and directories Parameters ---------- search_dir: str path to the directory in which to search for files and subdirectories inclusion_patterns: list or tuple a list or tuple of patterns to match files/dirs against for inclusion in match output exclusion_patterns: list or tuple a list or tuple of patterns to match files/dirs against for exclusion from match output num_limit: int the number of matches to return; use `None` for no limit level_limit: int the number of directory levels to recurse; 0 is parent dir only match_mode: 'any' or 'all'; matches any of the provided inclusion_patterns, or all of them search_type: 'all', 'file', or 'dir'; type of items to find Returns ------- list a list of matching file or directory paths """ import sys import itertools if num_limit != None: matches = [] for item in find_gen(search_dir = search_dir, inclusion_patterns = inclusion_patterns, exclusion_patterns = exclusion_patterns, search_type = search_type, level_limit = level_limit, match_mode = match_mode): if len(matches) <= int(num_limit): matches.append(item) # logger.debug("Matches found: {0}".format(matches)) return(matches) else: matches = [item for item in find_gen(search_dir = search_dir, inclusion_patterns = inclusion_patterns, exclusion_patterns = exclusion_patterns, search_type = search_type, level_limit = level_limit, match_mode = match_mode)] # logger.debug("Matches found: {0}".format(matches)) return(matches)
[docs]def find_gen(search_dir, inclusion_patterns = ('*',), exclusion_patterns = (), search_type = 'all', level_limit = None, match_mode = "any"): """ Generator function to return file matches. Used internally by `find` Parameters ---------- search_dir: str path to the directory in which to search for files and subdirectories inclusion_patterns: list or tuple a list or tuple of patterns to match files/dirs against for inclusion in match output exclusion_patterns: list or tuple a list or tuple of patterns to match files/dirs against for exclusion from match output level_limit: int the number of directory levels to recurse; 0 is parent dir only match_mode: 'any' or 'all'; matches any of the provided inclusion_patterns, or all of them search_type: 'all', 'file', or 'dir'; type of items to find """ import os import sys import fnmatch search_dir = search_dir.rstrip(os.path.sep) # assert os.path.isdir(search_dir) num_sep = search_dir.count(os.path.sep) # logger.debug("Searching {0} for {1} matching {2}, level limit: {3}".format(search_dir, search_type, inclusion_patterns, level_limit)) for root, dirs, files in os.walk(search_dir): # choose which items to search if search_type == 'all': items = dirs + files elif search_type == 'dir': items = dirs elif search_type == 'file': items = files else: logger.error("Search type '{0}' not valid, exiting script".format(search_type)) sys.exit() # yeild the results for item in super_filter(names = items, inclusion_patterns = inclusion_patterns, exclusion_patterns = exclusion_patterns, match_mode = match_mode): yield(os.path.join(root, item)) # check for a level limit if level_limit != None: num_sep_this = root.count(os.path.sep) if num_sep + int(level_limit) <= num_sep_this: del dirs[:]
[docs]def super_filter(names, inclusion_patterns = ('*',), exclusion_patterns = (), match_mode = "any"): """ Enhanced version of `fnmatch.filter()` that accepts multiple inclusion and exclusion patterns. Filter the input names by choosing only those that are matched by some pattern in `inclusion_patterns` _and_ not by any in `exclusion_patterns`. Adapted from: https://codereview.stackexchange.com/questions/74713/filtering-with-multiple-inclusion-and-exclusion-patterns """ included = multi_filter(names, patterns = inclusion_patterns, match_mode = match_mode) excluded = multi_filter(names, patterns = exclusion_patterns, match_mode = match_mode) for item in set(included) - set(excluded): yield(item)
[docs]def multi_filter(names, patterns, match_mode = "any"): """ Generator function which yields the names that match one or more of the patterns. """ # logger.debug("Filtering {0} against {1}; match_mode: {2}".format(names, patterns, match_mode)) for name in names: basename = os.path.basename(name) # logger.debug("item: {0}".format(basename)) # in case a single string was passed as a pattern if isinstance(patterns, str): if fnmatch.fnmatch(basename, patterns): yield(name) # patterns is not an empty list elif patterns: if match_mode == 'any': if any(fnmatch.fnmatch(basename, pattern) for pattern in patterns): # logger.debug("match found") yield(name) elif match_mode == 'all': if all(fnmatch.fnmatch(basename, pattern) for pattern in patterns): # logger.debug("match found") yield(name)
# # for pattern in patterns: # if fnmatch.fnmatch(name, pattern): # yield name # deprecated
[docs]def find_files(search_dir, search_filename): """ deprecated function that returns the paths to all files matching the supplied filename in the search dir """ import os logger.debug('Now searching for file "{0}" in directory {1}'.format(search_filename, search_dir)) file_list = [] for root, dirs, files in os.walk(search_dir): for file in files: if file == search_filename: found_file = os.path.join(root, file) file_list.append(found_file) logger.debug('Found {0} matches'.format(len(file_list))) return(file_list)
[docs]def walklevel(some_dir, level=1): """ deprecated function that recursively searches a directory for all items up to a given depth Examples ---------- Example usage:: file_list = [] for item in pf.walklevel(some_dir): if (item.endswith('my_file.txt') and os.path.isfile(item) ): file_list.append(item) """ import os some_dir = some_dir.rstrip(os.path.sep) assert os.path.isdir(some_dir) num_sep = some_dir.count(os.path.sep) for root, dirs, files in os.walk(some_dir): # yield root, dirs, files for dir in dirs: yield os.path.join(root, dir) for file in files: yield os.path.join(root, file) num_sep_this = root.count(os.path.sep) if num_sep + level <= num_sep_this: del dirs[:]