2018-08-08
1.frm.unqiue()
for i in list(set(html_df['table_id'].values)):
for i in html_df['table_id'].unique()
2.正则表达式跨行搜索
with open(py_file_path, 'r', encoding='utf-8') as f:
file_codeline_list = f.readlines()
for line in file_codeline_list: # todo: 可以简化
if re.match(self.re_author, line):
author = re.match(self.re_author, line).group(1)
author = author.lstrip().rstrip()
author = author.replace(self.DOUBLE_QUOTES, '').replace(self.SINGLE_QUOTES, '')
return author
return self.UNKNOW_AUTHOR
re_author = r'\s*_author\s*=\s*"(\S+)"'
assert isinstance(py_file_path, str)
with open(py_file_path, 'r', encoding='utf-8') as f:
all_text = f.read()
res = re.search(re_author, all_text)
if res:
author = all_text[res.regs[1][0]: res.regs[1][1]]
return author
return self.UNKNOW_AUTHOR
3.迭代表达式
with open(py_file_path, 'r', encoding='utf-8') as f:
file_lines = f.readlines()
striped_lines = []
for line in file_lines:
line = line.lstrip() # 去除缩进
if line != '': # 跳过空白行
striped_lines.append(line)
return striped_lines
with open(py_file_path, 'r', encoding='utf-8') as f:
file_lines = f.readlines()
striped_lines = [line.lstrip() for line in file_lines if line]
4.提取函数
事不过三,三则重构
assert isinstance(author, str)
assert isinstance(code_frm, pd.DataFrame)
assert isinstance(title_num, str)
from helper import assert_isinstance # ylib里面提供易用的函数
assert_isinstance([author,code_frm,title_num ],[str, pd.DataFrame, str])
5.**Series传递函数参数
def row_td_head(self, i, frm, td_head_list):
upon_td_align = frm.loc[i, 'upon_td_align']
upon_td_index = frm.loc[i, 'upon_td_index']
row_align = frm.loc[i, 'row_align']
col_index = frm.loc[i,'col_index']
string = frm.loc[i, 'string']
# 省略了N行代码
return td_head
def row_td_head(self, i, frm, td_head_list):
def the_func(upon_td_align,upon_td_index,row_align,col_index,string):
# 省略了N行代码
return td_head
return the_func(**frm.loc[i, ['upon_td_align','upon_td_index','row_align','col_index', 'string']])
cc利用1到5点改下代码.hj看下3到4.