Python - 辅助函数库misc-utils

Author： AIHGF
发布时间：January 11, 2022
246views
No comments
4968 words
Categories： Python

Github - misads/misc_utils

安装：

pip install utils-misc

实现的常用函数有：

import misc_utils as utils

1. 执行shell 命令cmd

1.1. 定义

misc_utils.misc_utils.cmd(shell)

1.2. 实现

import os 

def cmd(shell):
    """Run a shell and return results.

    Args:

    """
    lines = os.popen(shell).readlines()
    return [line.rstrip('\n') for line in lines]

1.3. 示例

utils.cmd('ls')
utils.cmd('nvidia-smi')

2. 打印自定义颜色的文本color_print

2.1. 定义

misc_utils.misc_utils.color_print(text='', color=0, end='\n')

参数：

[1] - text - 待打印文本, str

[2] - color - 颜色，int，候选项：[0 black; 1 red; 2 green; 3 yellow; 4 blue; 5 cyan (like light red); 6 magenta (like light blue); 7 white]

[3] - end - 待颜色文本后的结束符, str

2.2. 实现

def color_print(text='', color=0, end='\n'):
    """Print colored text.

    Args:
        text(str): text to print.
        color(int):
            * 0       black
            * 1       red
            * 2       green
            * 3       yellow
            * 4       blue
            * 5       cyan (like light red)
            * 6       magenta (like light blue)
            * 7       white
        end(str): end string after colored text.

    Example
        >>> color_print('yellow', 3)

    """
    print('\033[1;3%dm' % color, end='')
    print(text, end='')
    print('\033[0m', end=end)

2.3. 示例

color_print('yellow', 3)
color_print('yellow', 2)
color_print('test', 0)

3. 加载文本文件并解析内容为字符串列表file_lines

3.1. 定义

misc_utils.misc_utils.file_lines(filename, prefix='', offset=0, max_num=0)

参数：

[1] - filename - 文件名

[2] - prefix - 每一项前插入的前缀

[3] - offset - 行的偏移量

[4] - max_num - 读取的最大行数，0或负数表示不限制.

3.2. 实现

def file_lines(filename, prefix='', offset=0, max_num=0):
    """Load a text file and parse the content as a list of strings.

    Args:
        filename (str): Filename.
        prefix (str): The prefix to be inserted to the begining of each item.
        offset (int): The offset of lines.
        max_num (int): The maximum number of lines to be read,
            zeros and negatives mean no limitation.

    Returns:
        list[str]: A list of strings.
    """
    cnt = 0
    item_list = []
    with open(filename, 'r') as f:
        for _ in range(offset):
            f.readline()
        for line in f:
            if cnt >= max_num > 0:
                break
            item_list.append(prefix + line.rstrip('\n'))
            cnt += 1
    return item_list

4. 格式化数字format_num

4.1. 定义

misc_utils.misc_utils.format_num(num: int) → str

每三位添加一个逗号，返回字符串

4.2. 实现

def format_num(num: int) -> str:
    """Add comma in every three digits (return a string).

    Args:
        num(int): a number.

    Examples
        >>> format_num(10000)  # 10,000
        >>> format_num(123456789)  # 123,456,789

    """
    num = str(num)
    ans = ''
    for i in range(len(num)-3, -4, -3):
        if i < 0:
            ans = num[0:i+3] + ans
        else:
            ans = ',' + num[i:i+3] + ans

    return ans.lstrip(',')

4.3. 示例

format_num(10000)     #'10,000'
format_num(123456789) # '123,456,789'

5. 格式化时间format_time

5.1. 定义

misc_utils.misc_utils.format_time(seconds)

将秒转化为格式化的字符串

5.2. 实现

def format_time(seconds):
    """Convert seconds to formatted time string.

    Args:
        seconds(int): second number.

    Examples
        >>> format_time(10)  # 10s
        >>> format_time(100)  # 1m
        >>> format_time(10000)  # 2h 47m
        >>> format_time(1000000)  # 11d 13h 47m

    """
    eta_d = seconds // 86400
    eta_h = (seconds % 86400) // 3600
    eta_m = (seconds % 3600) // 60
    eta_s = seconds % 60
    if eta_d:
        eta = '%dd %dh %dm' % (eta_d, eta_h, eta_m)
    elif eta_h:
        eta = '%dh %dm' % (eta_h, eta_m)
    elif eta_m:
        eta = '%dm' % eta_m
    else:
        eta = '%ds' % eta_s
    return eta

5.3. 示例

format_time(10)  # 10s
format_time(100)  # 1m
format_time(10000)  # 2h 47m
format_time(1000000)  # 11d 13h 47m

6. 给定概率返回True - gambling

6.1. 定义

misc_utils.misc_utils.gambling(prob, total=1.0)

随机返回True或False.

6.2. 实现

import random

def gambling(prob, total=1.0):
    """Return True in a given probability
    Args:
        prob(float): chance to return True.
        total(float): total, default 1.0.

    Returns:
        (bool): (randomly) True or False.

    """
    prob = prob / total
    if random.random() <= prob:
        return True
    else:
        return False

6.3. 示例

gambling(0.3)

7. 获取相对路径名get_dir_name

7.1. 定义

misc_utils.misc_utils.get_dir_name(path)

7.2. 实现

import os 

def get_dir_name(path):
    """Get parent directory name.

    Args
        path(str): file's abs path.

    Returns
        dirname.

    Example
        >>> get_dir_name('root/train/0001.jpg')  # mode/train
        >>> get_dir_name(get_dir_name('root/train/0001.jpg'))  # root

    """
    return os.path.dirname(path)

7.3. 示例

get_dir_name('root/train/0001.jpg')  # mode/train
get_dir_name(get_dir_name('root/train/0001.jpg'))  # root

8. 获取文件名get_file_name

8.1. 定义

misc_utils.misc_utils.get_file_name(path)

8.2. 实现

import os

def get_file_name(path):
    """Get filename by path (without extension).

    Args
        path(str): file's abs path.

    Returns
        filename (without extension).

    Example
        >>> get_file_name('train/0001.jpg')  # 0001

    """
    name, _ = os.path.splitext(os.path.basename(path))
    return name

8.3. 示例

get_file_name('train/0001.jpg')  # 0001

9. 匹配文件路径get_file_paths_by_pattern

9.1. 定义

misc_utils.misc_utils.get_file_paths_by_pattern(pattern='*', folder=None)

参数：

[1] - pattern - 文件名待匹配模式

[2] - folder - 搜索路径

返回匹配路径列表.

9.2. 实现

import glob

def get_file_paths_by_pattern(pattern='*', folder=None):
    """Get a file path list matched given pattern.

    Args:
        pattern(str): a pattern to match files.
        folder(str): searching folder.

    Returns
        (list of str): a list of matching paths.

    Examples
        >>> get_file_paths_by_pattern('*.png')  # get all *.png files in folder
        >>> get_file_paths_by_pattern('*rotate*')  # get all files with 'rotate' in name

    """
    if folder is None:
        return glob.glob(pattern)
    else:
        return glob.glob(os.path.join(folder, pattern))

9.3. 示例

get_file_paths_by_pattern('*.png')  # get all *.png files in folder
get_file_paths_by_pattern('*rotate*')  # get all files with 'rotate' in name

10. 获取日志记录器get_logger

10.1. 定义

misc_utils.misc_utils.get_logger(f='log.txt', mode='w', level='info', print_stream=True)

参数：

[1] - f - 日志文件路径

[2] - mode - w 或 a

[3] - level - debug 或 info

[4] - print_stream - 是否终端打印

10.2. 实现

import logger

def get_logger(f='log.txt', mode='w', level='info', print_stream=True):
    """Get a logger.

    Args:

        f(str): log file path.
        mode(str): 'w' or 'a'.
        level(str): 'debug' or 'info'.
        print_stream(bool): if print to terminal or not.

    Returns:
        A logger.

    Example
        >>> logger = get_logger(level='debug')
        >>> logger.info("test")

    """
    logger = logging.getLogger(__name__)
    if level.lower() == 'debug':
        logger.setLevel(logging.DEBUG)
        formatter = logging.Formatter(
            "[%(levelname)s] %(asctime)s %(pathname)s, line %(lineno)d, in %(funcName)s(): '%(message)s'",
            datefmt='%Y-%m-%d %H:%M:%S')
    elif level.lower() == 'info':
        logger.setLevel(logging.INFO)
        formatter = logging.Formatter(
            "[%(levelname)s] %(asctime)s %(message)s",
            datefmt='%Y-%m-%d %H:%M:%S')

    fh = logging.FileHandler(f, mode=mode)
    fh.setLevel(logging.DEBUG)
    fh.setFormatter(formatter)

    ch = logging.StreamHandler()
    if print_stream:
        ch.setLevel(logging.DEBUG)
    else:
        ch.setLevel(logging.CRITICAL)
    ch.setFormatter(formatter)

    logger.addHandler(ch)
    logger.addHandler(fh)
    return logger

10.3 示例

logger = get_logger(level='debug')
logger.info("test")
logger.debug("test")

11. Unix 时间戳（秒）get_time_stamp

11.1. 定义

misc_utils.misc_utils.get_time_stamp(add_offset=0)

11.2. 实现

import time 

def get_time_stamp(add_offset=0):
    """Get time_zone+0 unix time stamp (seconds)

    Args:
        add_offset(int): bias added to time stamp

    Returns:
        (str): time stamp seconds
    """
    ti = int(time.time())
    ti = ti + add_offset
    return str(ti)

12. 格式化时间字符串的时间戳get_time_stamp_by_format_str

12.1. 定义

misc_utils.misc_utils.get_time_stamp_by_format_str(time_str: str, fmt='%Y/%m/%d %H:%M:%S', timezone=8)

12.2. 实现

import datetime

def get_time_stamp_by_format_str(time_str: str, fmt="%Y/%m/%d %H:%M:%S", timezone=8):
    """Get timestamp by formatted time string.

    Args:
        time_str(str): string in fmt format.
        fmt(str): format.
        timezone(int): time zone.

    Returns:
        (str): time stamp

    Example:
        >>> get_time_stamp_by_format_str('2020/01/01 15:30:00')
        >>> # 1577863800

    """
    time_0 = datetime.datetime.utcfromtimestamp(0)

    time_str_parse = datetime.datetime.strptime(time_str, fmt)
    time_str_parse = time_str_parse - datetime.timedelta(hours=timezone)

    days = (time_str_parse - time_0).days
    seconds = (time_str_parse - time_0).seconds
    return str(days * 3600 * 24 + seconds)

12.3 示例

get_time_stamp_by_format_str('2022/01/11 15:30:00') #'1641886200'

13. 获取时间字符串get_time_str

13.1. 定义

misc_utils.misc_utils.get_time_str(time_stamp=None, fmt='%Y/%m/%d %H:%M:%S', timezone=8, year_length=4)

13.2. 实现

import datetime

def get_time_str(time_stamp=None, fmt="%Y/%m/%d %H:%M:%S", timezone=8, year_length=4):
    """Get formatted time string.
    
    Args:
        time_stamp(str): linux time string (seconds).
        fmt(str): string format.
        timezone(int): time zone.
        year_length(int): 2 or 4.

    Returns:
        (str): formatted time string.

    Example:
        >>> get_time_str()
        >>> # 2020/01/01 13:30:00

    """
    if time_stamp is None:
        time_stamp = get_time_stamp()

    time_stamp = int(time_stamp)

    base_time = datetime.datetime.utcfromtimestamp(time_stamp)

    time_zone_time = base_time + datetime.timedelta(hours=timezone)
    format_time_str = time_zone_time.strftime(fmt)

    if year_length == 2:
        format_time_str = format_time_str[2:]
    return format_time_str

13.3. 示例

get_time_str()

14. 获取随机哈希字符串hash

14.1. 定义

misc_utils.misc_utils.hash(length=8)

返回随机 hash-like 字符串，如，a6b3c47f

14.2. 实现

def hash(length=8):
    """Return a random hash-like string such as `a6b3c47f`.
    Args:
        length(int): length of hash

    Returns:
        (bool): (randomly) a hash-like string.

    """
    a = '0123456789abcdef'
    res = ''
    for _ in range(length):
        res += a[random.randint(0, 15)]

    return res

15. 判断文件是否为图片is_file_image

15.1. 定义

misc_utils.misc_utils.is_file_image(filename)

15.2. 实现

def is_file_image(filename):
    """Return if a file's extension is an image's.

    Args:
        filename(str): file path.

    Returns:
        (bool): if the file is image or not.

    """
    img_ex = ['jpg', 'png', 'bmp', 'jpeg', 'tiff']
    if '.' not in filename:
        return False
    s = filename.split('.')

    if s[-1].lower() not in img_ex:
        return False

    if filename.startswith('.'):
        return False

    return True

16. 计算列表均值mean

16.1. 定义

misc_utils.misc_utils.mean(data: list, prec=3)

16.2. 实现

def mean(data: list, prec=3):
    """Calc mean value of a list.

    Args:
        data(list): a list.
        prec(int): round precision.

    Returns:
        (float) mean value.

    Example:
        >>> mean([1, 2, 3, 4])
        >>> # 2.5

    """
    return round(sum(data) / len(data), prec)

17. 递归打印列表、元组及字典

17.1. 定义

misc_utils.misc_utils.p(obj)

17.2. 实现

def p(obj):
    """Recursively print list, tuple or dict items

    Args:
        obj(list, tuple or dict): a list, tuple or dict to print.

    """
    if type(obj) == list or type(obj) == tuple:
        for i in obj:
            print(i)
    elif type(obj) == dict:
        for k in obj:
            print('%s: %s' % (k, obj[k]))
    else:
        print(obj)

18. 打印argparse解析的参数print_args

18.1. 定义

misc_utils.misc_utils.print_args(args)

### 18.2. 实现

def print_args(args):
    """Print args parsed by argparse.

    Args:
        args: args parsed by argparse.

    Example
        >>> parser = argparse.ArgumentParser()
        >>> args = parser.parse_args()
        >>> print_args(args)

    """
    print('===========Options===========')
    for k, obj in args._get_kwargs():
        print(' \033[1;32m', str(k).lstrip(), "\033[0m=\033[1;33m", obj, '\033[0m')
    print('=============================')

18.3. 示例

import argparse 

parser = argparse.ArgumentParser()
args = parser.parse_args()

print_args(args)

19. 终端渲染进度条progress_bar

19.1. 定义

misc_utils.misc_utils.progress_bar(current, total, pre_msg=None, msg=None)

19.2. 实现

import time
import sys

TOTAL_BAR_LENGTH = 30
last_time = time.time()
begin_time = last_time

def progress_bar(current, total, pre_msg=None, msg=None):
    """Render a progress_bar in terminal.

    Preview
        Training...  Step: [=======>... 26/100 ...........] ETA: 0s | loss: 0.45

    Args:

        current(int): current counter, range in [0, total-1].
        total(int): total counts.
        pre_msg(str): message before the progress bar.
        msg(str): message after the progress bar.

    Example
        >>> for i in range(100):
        >>>     progress_bar(i, 100, 'Training...', 'loss:0.45')

    """
    global last_time, begin_time
    if current == 0:
        begin_time = time.time()  # Reset for new bar.

    cur_len = int(TOTAL_BAR_LENGTH*current/total)
    rest_len = int(TOTAL_BAR_LENGTH - cur_len) - 1

    if pre_msg is None:
        pre_msg = ''
    sys.stdout.write(pre_msg + ' Step:')

    sys.stdout.write(' [')
    for i in range(cur_len):
        sys.stdout.write('=')
    sys.stdout.write('>')
    for i in range(rest_len):
        sys.stdout.write('.')
    sys.stdout.write(']')

    cur_time = time.time()
    step_time = cur_time - last_time
    last_time = cur_time
    tot_time = cur_time - begin_time
    eta_time = int((total - current) * step_time)
    eta = format_time(eta_time)

    L = []
    L.append(' ETA: %s' % eta)
    if msg:
        L.append(' | ' + msg)

    msg = ''.join(L)
    sys.stdout.write(msg)
    for i in range(3):
        sys.stdout.write(' ')
    # for i in range(term_width-int(TOTAL_BAR_LENGTH)-len(msg)-3):
    #     sys.stdout.write(' ')

    # Go back to the center of the bar.
    # for i in range(term_width-int(TOTAL_BAR_LENGTH/2)+2):
    #     sys.stdout.write('\b')
    # sys.stdout.write(' %d/%d ' % (current+1, total))
    for i in range(len(msg) + int(TOTAL_BAR_LENGTH/2)+8):
        sys.stdout.write('\b')
    sys.stdout.write(' %d/%d ' % (current+1, total))

    if current < total-1:
        sys.stdout.write('\r')
    else:
        sys.stdout.write('\n')
    sys.stdout.flush()

19.3. 示例

for i in range(100):
    progress_bar(i, 100, 'Training...', 'loss:0.45')

20. 字典key

20.1. 定义

misc_utils.misc_utils.safe_key(dic: dict, key, default=None)

20.2. 实现

def safe_key(dic: dict, key, default=None):
    """Return dict[key] if dict has the key, in case of KeyError.

    Args:
        dic(dict): a dictionary.
        key(usually str or int): key.
        default: default return value.

    Returns:
        dic[key] if key in dic else default.

    """
    if key in dic:
        return dic[key]
    else:
        return default

21. 根据 token 划分字符串split_underline

21.1. 定义

misc_utils.misc_utils.split_underline(str, end_num, start_num=0, token='_', keep_ex=True)

21.2. 实现

import os 

def split_underline(str, end_num, start_num=0, token='_', keep_ex=True):
    """split a string by token and return a part of it.

    Args:
       str(str): string to handle with.
       end_num(int): end of kept parts.
       start_num(int): start of kept parts.
       token(str): split by which token.
       keep_ex(bool): whether to keep original extension.

    Example:
        >>> split_underline('abc_123_t134567_cam1.jpg', 2)
        >>> # abc_123.jpg

    """
    if keep_ex:
        ex = os.path.splitext(str)[-1]
    else:
        ex = ''

    str = get_file_name(str)
    return token.join(str.split(token)[start_num: end_num]) + ex

21.3. 示例

split_underline('abc_123_t134567_cam1.jpg', 2)

22. 转换为字符串to_string

22.1. 定义

misc_utils.misc_utils.to_string(obj, last_comma=False)

22.2. 实现

def to_string(obj, last_comma=False):
    """Convert to string in one line.

    Args:
        obj(list, tuple or dict): a list, tuple or dict to convert.
        last_comma(bool): add a comma at last.

    Returns:
        (str) string.

    Example:
        >>> to_string([1, 2, 3, 4], last_comma=True)
        >>> # 1, 2, 3, 4,
        >>> to_string({'a': 2,'b': 4})
        >>> # a=2, b=4

    """
    s = ''
    if type(obj) == list or type(obj) == tuple:
        for i, data in enumerate(obj):
            s += str(data)
            if last_comma or i != len(obj)-1:
                s += ', '

    elif type(obj) == dict:
        for i, data in enumerate(obj.items()):
            k, v = data
            s += '%s=%s' % (str(k), str(v))
            if last_comma or i != len(obj)-1:
                s += ', '
    else:
        s = str(obj)

    return s

22.3. 示例

to_string([1, 2, 3, 4], last_comma=True) #'1, 2, 3, 4, '
to_string({'a': 2,'b': 4}) #'a=2, b=4'

23. 字典与列表相互转换toggle_list_dict

23.1. 定义

misc_utils.misc_utils.toggle_list_dict(obj)

23.2. 实现

def toggle_list_dict(obj):
    """Convert list of dict to dict of list, and vice versa.

    Args:
        obj: a list or a dict.

    Returns:
        converted type of obj.

    Example:
        >>> toggle_list_dict([{'a': 3}, {'a': 5}, {'a': 7}])
        >>> # {'a': [3, 5, 7]}
        >>> toggle_list_dict({'a': [3, 5, 7]})
        >>> # [{'a': 3}, {'a': 5}, {'a': 7}]
        >>> k, v = toggle_list_dict({1: 2, 3: 4})
        >>> # k=[1, 3], v=[2, 4]

    """
    if len(obj) == 0:
        return obj

    if type(obj) == list:
        ans = {}
        if type(obj[0]) == dict:
            l = len(obj)
            keys = obj[0].keys()
            return {k: [obj[i][k] for i in range(l)] for k in keys}
        else:
            for i, data in enumerate(obj):
                ans[i] = data
            return ans

    elif type(obj) == dict:
        first_key = list(obj.keys())[0]
        first_value = obj[first_key]
        if type(first_value) == list:
            l = len(first_value)
            return [{i: obj[i][j] for i in obj.keys()} for j in range(l)]
        else:
            return list(zip(*(obj.items())))

    else:
        return obj

23.3. 示例

toggle_list_dict([{'a': 3}, {'a': 5}, {'a': 7}]) # {'a': [3, 5, 7]}
toggle_list_dict({'a': [3, 5, 7]})               # [{'a': 3}, {'a': 5}, {'a': 7}]
k, v = toggle_list_dict({1: 2, 3: 4})            # k=[1, 3], v=[2, 4]

Last modification：January 11th, 2022 at 08:09 pm

暂无相关推荐