Github - misads/misc_utils
安装:
pip install utils-misc
实现的常用函数有:
import misc_utils as utils
1. 执行shell 命令cmd
1.1. 定义
misc_utils.misc_utils.cmd(shell)
1.2. 实现
import os
def cmd(shell):
"""Run a shell and return results.
Args:
"""
lines = os.popen(shell).readlines()
return [line.rstrip('\n') for line in lines]
1.3. 示例
utils.cmd('ls')
utils.cmd('nvidia-smi')
2. 打印自定义颜色的文本color_print
2.1. 定义
misc_utils.misc_utils.color_print(text='', color=0, end='\n')
参数:
[1] - text
- 待打印文本, str
[2] - color
- 颜色,int,候选项:[0 black; 1 red; 2 green; 3 yellow; 4 blue; 5 cyan (like light red); 6 magenta (like light blue); 7 white]
[3] - end
- 待颜色文本后的结束符, str
2.2. 实现
def color_print(text='', color=0, end='\n'):
"""Print colored text.
Args:
text(str): text to print.
color(int):
* 0 black
* 1 red
* 2 green
* 3 yellow
* 4 blue
* 5 cyan (like light red)
* 6 magenta (like light blue)
* 7 white
end(str): end string after colored text.
Example
>>> color_print('yellow', 3)
"""
print('\033[1;3%dm' % color, end='')
print(text, end='')
print('\033[0m', end=end)
2.3. 示例
color_print('yellow', 3)
color_print('yellow', 2)
color_print('test', 0)
3. 加载文本文件并解析内容为字符串列表file_lines
3.1. 定义
misc_utils.misc_utils.file_lines(filename, prefix='', offset=0, max_num=0)
参数:
[1] - filename
- 文件名
[2] - prefix
- 每一项前插入的前缀
[3] - offset
- 行的偏移量
[4] - max_num
- 读取的最大行数,0或负数表示不限制.
3.2. 实现
def file_lines(filename, prefix='', offset=0, max_num=0):
"""Load a text file and parse the content as a list of strings.
Args:
filename (str): Filename.
prefix (str): The prefix to be inserted to the begining of each item.
offset (int): The offset of lines.
max_num (int): The maximum number of lines to be read,
zeros and negatives mean no limitation.
Returns:
list[str]: A list of strings.
"""
cnt = 0
item_list = []
with open(filename, 'r') as f:
for _ in range(offset):
f.readline()
for line in f:
if cnt >= max_num > 0:
break
item_list.append(prefix + line.rstrip('\n'))
cnt += 1
return item_list
4. 格式化数字format_num
4.1. 定义
misc_utils.misc_utils.format_num(num: int) → str
每三位添加一个逗号,返回字符串
4.2. 实现
def format_num(num: int) -> str:
"""Add comma in every three digits (return a string).
Args:
num(int): a number.
Examples
>>> format_num(10000) # 10,000
>>> format_num(123456789) # 123,456,789
"""
num = str(num)
ans = ''
for i in range(len(num)-3, -4, -3):
if i < 0:
ans = num[0:i+3] + ans
else:
ans = ',' + num[i:i+3] + ans
return ans.lstrip(',')
4.3. 示例
format_num(10000) #'10,000'
format_num(123456789) # '123,456,789'
5. 格式化时间format_time
5.1. 定义
misc_utils.misc_utils.format_time(seconds)
将秒转化为格式化的字符串
5.2. 实现
def format_time(seconds):
"""Convert seconds to formatted time string.
Args:
seconds(int): second number.
Examples
>>> format_time(10) # 10s
>>> format_time(100) # 1m
>>> format_time(10000) # 2h 47m
>>> format_time(1000000) # 11d 13h 47m
"""
eta_d = seconds // 86400
eta_h = (seconds % 86400) // 3600
eta_m = (seconds % 3600) // 60
eta_s = seconds % 60
if eta_d:
eta = '%dd %dh %dm' % (eta_d, eta_h, eta_m)
elif eta_h:
eta = '%dh %dm' % (eta_h, eta_m)
elif eta_m:
eta = '%dm' % eta_m
else:
eta = '%ds' % eta_s
return eta
5.3. 示例
format_time(10) # 10s
format_time(100) # 1m
format_time(10000) # 2h 47m
format_time(1000000) # 11d 13h 47m
6. 给定概率返回True - gambling
6.1. 定义
misc_utils.misc_utils.gambling(prob, total=1.0)
随机返回True或False.
6.2. 实现
import random
def gambling(prob, total=1.0):
"""Return True in a given probability
Args:
prob(float): chance to return True.
total(float): total, default 1.0.
Returns:
(bool): (randomly) True or False.
"""
prob = prob / total
if random.random() <= prob:
return True
else:
return False
6.3. 示例
gambling(0.3)
7. 获取相对路径名get_dir_name
7.1. 定义
misc_utils.misc_utils.get_dir_name(path)
7.2. 实现
import os
def get_dir_name(path):
"""Get parent directory name.
Args
path(str): file's abs path.
Returns
dirname.
Example
>>> get_dir_name('root/train/0001.jpg') # mode/train
>>> get_dir_name(get_dir_name('root/train/0001.jpg')) # root
"""
return os.path.dirname(path)
7.3. 示例
get_dir_name('root/train/0001.jpg') # mode/train
get_dir_name(get_dir_name('root/train/0001.jpg')) # root
8. 获取文件名get_file_name
8.1. 定义
misc_utils.misc_utils.get_file_name(path)
8.2. 实现
import os
def get_file_name(path):
"""Get filename by path (without extension).
Args
path(str): file's abs path.
Returns
filename (without extension).
Example
>>> get_file_name('train/0001.jpg') # 0001
"""
name, _ = os.path.splitext(os.path.basename(path))
return name
8.3. 示例
get_file_name('train/0001.jpg') # 0001
9. 匹配文件路径get_file_paths_by_pattern
9.1. 定义
misc_utils.misc_utils.get_file_paths_by_pattern(pattern='*', folder=None)
参数:
[1] - pattern
- 文件名待匹配模式
[2] - folder
- 搜索路径
返回匹配路径列表.
9.2. 实现
import glob
def get_file_paths_by_pattern(pattern='*', folder=None):
"""Get a file path list matched given pattern.
Args:
pattern(str): a pattern to match files.
folder(str): searching folder.
Returns
(list of str): a list of matching paths.
Examples
>>> get_file_paths_by_pattern('*.png') # get all *.png files in folder
>>> get_file_paths_by_pattern('*rotate*') # get all files with 'rotate' in name
"""
if folder is None:
return glob.glob(pattern)
else:
return glob.glob(os.path.join(folder, pattern))
9.3. 示例
get_file_paths_by_pattern('*.png') # get all *.png files in folder
get_file_paths_by_pattern('*rotate*') # get all files with 'rotate' in name
10. 获取日志记录器get_logger
10.1. 定义
misc_utils.misc_utils.get_logger(f='log.txt', mode='w', level='info', print_stream=True)
参数:
[1] - f
- 日志文件路径
[2] - mode
- w
或 a
[3] - level
- debug
或 info
[4] - print_stream
- 是否终端打印
10.2. 实现
import logger
def get_logger(f='log.txt', mode='w', level='info', print_stream=True):
"""Get a logger.
Args:
f(str): log file path.
mode(str): 'w' or 'a'.
level(str): 'debug' or 'info'.
print_stream(bool): if print to terminal or not.
Returns:
A logger.
Example
>>> logger = get_logger(level='debug')
>>> logger.info("test")
"""
logger = logging.getLogger(__name__)
if level.lower() == 'debug':
logger.setLevel(logging.DEBUG)
formatter = logging.Formatter(
"[%(levelname)s] %(asctime)s %(pathname)s, line %(lineno)d, in %(funcName)s(): '%(message)s'",
datefmt='%Y-%m-%d %H:%M:%S')
elif level.lower() == 'info':
logger.setLevel(logging.INFO)
formatter = logging.Formatter(
"[%(levelname)s] %(asctime)s %(message)s",
datefmt='%Y-%m-%d %H:%M:%S')
fh = logging.FileHandler(f, mode=mode)
fh.setLevel(logging.DEBUG)
fh.setFormatter(formatter)
ch = logging.StreamHandler()
if print_stream:
ch.setLevel(logging.DEBUG)
else:
ch.setLevel(logging.CRITICAL)
ch.setFormatter(formatter)
logger.addHandler(ch)
logger.addHandler(fh)
return logger
10.3 示例
logger = get_logger(level='debug')
logger.info("test")
logger.debug("test")
11. Unix 时间戳(秒)get_time_stamp
11.1. 定义
misc_utils.misc_utils.get_time_stamp(add_offset=0)
11.2. 实现
import time
def get_time_stamp(add_offset=0):
"""Get time_zone+0 unix time stamp (seconds)
Args:
add_offset(int): bias added to time stamp
Returns:
(str): time stamp seconds
"""
ti = int(time.time())
ti = ti + add_offset
return str(ti)
12. 格式化时间字符串的时间戳get_time_stamp_by_format_str
12.1. 定义
misc_utils.misc_utils.get_time_stamp_by_format_str(time_str: str, fmt='%Y/%m/%d %H:%M:%S', timezone=8)
12.2. 实现
import datetime
def get_time_stamp_by_format_str(time_str: str, fmt="%Y/%m/%d %H:%M:%S", timezone=8):
"""Get timestamp by formatted time string.
Args:
time_str(str): string in fmt format.
fmt(str): format.
timezone(int): time zone.
Returns:
(str): time stamp
Example:
>>> get_time_stamp_by_format_str('2020/01/01 15:30:00')
>>> # 1577863800
"""
time_0 = datetime.datetime.utcfromtimestamp(0)
time_str_parse = datetime.datetime.strptime(time_str, fmt)
time_str_parse = time_str_parse - datetime.timedelta(hours=timezone)
days = (time_str_parse - time_0).days
seconds = (time_str_parse - time_0).seconds
return str(days * 3600 * 24 + seconds)
12.3 示例
get_time_stamp_by_format_str('2022/01/11 15:30:00') #'1641886200'
13. 获取时间字符串get_time_str
13.1. 定义
misc_utils.misc_utils.get_time_str(time_stamp=None, fmt='%Y/%m/%d %H:%M:%S', timezone=8, year_length=4)
13.2. 实现
import datetime
def get_time_str(time_stamp=None, fmt="%Y/%m/%d %H:%M:%S", timezone=8, year_length=4):
"""Get formatted time string.
Args:
time_stamp(str): linux time string (seconds).
fmt(str): string format.
timezone(int): time zone.
year_length(int): 2 or 4.
Returns:
(str): formatted time string.
Example:
>>> get_time_str()
>>> # 2020/01/01 13:30:00
"""
if time_stamp is None:
time_stamp = get_time_stamp()
time_stamp = int(time_stamp)
base_time = datetime.datetime.utcfromtimestamp(time_stamp)
time_zone_time = base_time + datetime.timedelta(hours=timezone)
format_time_str = time_zone_time.strftime(fmt)
if year_length == 2:
format_time_str = format_time_str[2:]
return format_time_str
13.3. 示例
get_time_str()
14. 获取随机哈希字符串hash
14.1. 定义
misc_utils.misc_utils.hash(length=8)
返回随机 hash-like 字符串,如,a6b3c47f
14.2. 实现
def hash(length=8):
"""Return a random hash-like string such as `a6b3c47f`.
Args:
length(int): length of hash
Returns:
(bool): (randomly) a hash-like string.
"""
a = '0123456789abcdef'
res = ''
for _ in range(length):
res += a[random.randint(0, 15)]
return res
15. 判断文件是否为图片is_file_image
15.1. 定义
misc_utils.misc_utils.is_file_image(filename)
15.2. 实现
def is_file_image(filename):
"""Return if a file's extension is an image's.
Args:
filename(str): file path.
Returns:
(bool): if the file is image or not.
"""
img_ex = ['jpg', 'png', 'bmp', 'jpeg', 'tiff']
if '.' not in filename:
return False
s = filename.split('.')
if s[-1].lower() not in img_ex:
return False
if filename.startswith('.'):
return False
return True
16. 计算列表均值mean
16.1. 定义
misc_utils.misc_utils.mean(data: list, prec=3)
16.2. 实现
def mean(data: list, prec=3):
"""Calc mean value of a list.
Args:
data(list): a list.
prec(int): round precision.
Returns:
(float) mean value.
Example:
>>> mean([1, 2, 3, 4])
>>> # 2.5
"""
return round(sum(data) / len(data), prec)
17. 递归打印列表、元组及字典
17.1. 定义
misc_utils.misc_utils.p(obj)
17.2. 实现
def p(obj):
"""Recursively print list, tuple or dict items
Args:
obj(list, tuple or dict): a list, tuple or dict to print.
"""
if type(obj) == list or type(obj) == tuple:
for i in obj:
print(i)
elif type(obj) == dict:
for k in obj:
print('%s: %s' % (k, obj[k]))
else:
print(obj)
18. 打印argparse解析的参数print_args
18.1. 定义
misc_utils.misc_utils.print_args(args)
### 18.2. 实现
def print_args(args):
"""Print args parsed by argparse.
Args:
args: args parsed by argparse.
Example
>>> parser = argparse.ArgumentParser()
>>> args = parser.parse_args()
>>> print_args(args)
"""
print('===========Options===========')
for k, obj in args._get_kwargs():
print(' \033[1;32m', str(k).lstrip(), "\033[0m=\033[1;33m", obj, '\033[0m')
print('=============================')
18.3. 示例
import argparse
parser = argparse.ArgumentParser()
args = parser.parse_args()
print_args(args)
19. 终端渲染进度条progress_bar
19.1. 定义
misc_utils.misc_utils.progress_bar(current, total, pre_msg=None, msg=None)
19.2. 实现
import time
import sys
TOTAL_BAR_LENGTH = 30
last_time = time.time()
begin_time = last_time
def progress_bar(current, total, pre_msg=None, msg=None):
"""Render a progress_bar in terminal.
Preview
Training... Step: [=======>... 26/100 ...........] ETA: 0s | loss: 0.45
Args:
current(int): current counter, range in [0, total-1].
total(int): total counts.
pre_msg(str): message before the progress bar.
msg(str): message after the progress bar.
Example
>>> for i in range(100):
>>> progress_bar(i, 100, 'Training...', 'loss:0.45')
"""
global last_time, begin_time
if current == 0:
begin_time = time.time() # Reset for new bar.
cur_len = int(TOTAL_BAR_LENGTH*current/total)
rest_len = int(TOTAL_BAR_LENGTH - cur_len) - 1
if pre_msg is None:
pre_msg = ''
sys.stdout.write(pre_msg + ' Step:')
sys.stdout.write(' [')
for i in range(cur_len):
sys.stdout.write('=')
sys.stdout.write('>')
for i in range(rest_len):
sys.stdout.write('.')
sys.stdout.write(']')
cur_time = time.time()
step_time = cur_time - last_time
last_time = cur_time
tot_time = cur_time - begin_time
eta_time = int((total - current) * step_time)
eta = format_time(eta_time)
L = []
L.append(' ETA: %s' % eta)
if msg:
L.append(' | ' + msg)
msg = ''.join(L)
sys.stdout.write(msg)
for i in range(3):
sys.stdout.write(' ')
# for i in range(term_width-int(TOTAL_BAR_LENGTH)-len(msg)-3):
# sys.stdout.write(' ')
# Go back to the center of the bar.
# for i in range(term_width-int(TOTAL_BAR_LENGTH/2)+2):
# sys.stdout.write('\b')
# sys.stdout.write(' %d/%d ' % (current+1, total))
for i in range(len(msg) + int(TOTAL_BAR_LENGTH/2)+8):
sys.stdout.write('\b')
sys.stdout.write(' %d/%d ' % (current+1, total))
if current < total-1:
sys.stdout.write('\r')
else:
sys.stdout.write('\n')
sys.stdout.flush()
19.3. 示例
for i in range(100):
progress_bar(i, 100, 'Training...', 'loss:0.45')
20. 字典key
20.1. 定义
misc_utils.misc_utils.safe_key(dic: dict, key, default=None)
20.2. 实现
def safe_key(dic: dict, key, default=None):
"""Return dict[key] if dict has the key, in case of KeyError.
Args:
dic(dict): a dictionary.
key(usually str or int): key.
default: default return value.
Returns:
dic[key] if key in dic else default.
"""
if key in dic:
return dic[key]
else:
return default
21. 根据 token 划分字符串split_underline
21.1. 定义
misc_utils.misc_utils.split_underline(str, end_num, start_num=0, token='_', keep_ex=True)
21.2. 实现
import os
def split_underline(str, end_num, start_num=0, token='_', keep_ex=True):
"""split a string by token and return a part of it.
Args:
str(str): string to handle with.
end_num(int): end of kept parts.
start_num(int): start of kept parts.
token(str): split by which token.
keep_ex(bool): whether to keep original extension.
Example:
>>> split_underline('abc_123_t134567_cam1.jpg', 2)
>>> # abc_123.jpg
"""
if keep_ex:
ex = os.path.splitext(str)[-1]
else:
ex = ''
str = get_file_name(str)
return token.join(str.split(token)[start_num: end_num]) + ex
21.3. 示例
split_underline('abc_123_t134567_cam1.jpg', 2)
22. 转换为字符串to_string
22.1. 定义
misc_utils.misc_utils.to_string(obj, last_comma=False)
22.2. 实现
def to_string(obj, last_comma=False):
"""Convert to string in one line.
Args:
obj(list, tuple or dict): a list, tuple or dict to convert.
last_comma(bool): add a comma at last.
Returns:
(str) string.
Example:
>>> to_string([1, 2, 3, 4], last_comma=True)
>>> # 1, 2, 3, 4,
>>> to_string({'a': 2,'b': 4})
>>> # a=2, b=4
"""
s = ''
if type(obj) == list or type(obj) == tuple:
for i, data in enumerate(obj):
s += str(data)
if last_comma or i != len(obj)-1:
s += ', '
elif type(obj) == dict:
for i, data in enumerate(obj.items()):
k, v = data
s += '%s=%s' % (str(k), str(v))
if last_comma or i != len(obj)-1:
s += ', '
else:
s = str(obj)
return s
22.3. 示例
to_string([1, 2, 3, 4], last_comma=True) #'1, 2, 3, 4, '
to_string({'a': 2,'b': 4}) #'a=2, b=4'
23. 字典与列表相互转换toggle_list_dict
23.1. 定义
misc_utils.misc_utils.toggle_list_dict(obj)
23.2. 实现
def toggle_list_dict(obj):
"""Convert list of dict to dict of list, and vice versa.
Args:
obj: a list or a dict.
Returns:
converted type of obj.
Example:
>>> toggle_list_dict([{'a': 3}, {'a': 5}, {'a': 7}])
>>> # {'a': [3, 5, 7]}
>>> toggle_list_dict({'a': [3, 5, 7]})
>>> # [{'a': 3}, {'a': 5}, {'a': 7}]
>>> k, v = toggle_list_dict({1: 2, 3: 4})
>>> # k=[1, 3], v=[2, 4]
"""
if len(obj) == 0:
return obj
if type(obj) == list:
ans = {}
if type(obj[0]) == dict:
l = len(obj)
keys = obj[0].keys()
return {k: [obj[i][k] for i in range(l)] for k in keys}
else:
for i, data in enumerate(obj):
ans[i] = data
return ans
elif type(obj) == dict:
first_key = list(obj.keys())[0]
first_value = obj[first_key]
if type(first_value) == list:
l = len(first_value)
return [{i: obj[i][j] for i in obj.keys()} for j in range(l)]
else:
return list(zip(*(obj.items())))
else:
return obj
23.3. 示例
toggle_list_dict([{'a': 3}, {'a': 5}, {'a': 7}]) # {'a': [3, 5, 7]}
toggle_list_dict({'a': [3, 5, 7]}) # [{'a': 3}, {'a': 5}, {'a': 7}]
k, v = toggle_list_dict({1: 2, 3: 4}) # k=[1, 3], v=[2, 4]