周二:早上吃了Joyce给我的homemade 面包,配了三顿半咖啡!提神醒脑!早上完成了一些chores,包括交学费,和ta financial info update…
中午和仙女lu 仙女xi吃太古,我吃了好多胡萝卜和洋葱[awsl](她们都不吃)
下午和大星上课,互相交换了各种购物商品,吃穿用度(甚至包括脱毛膏…)小女生的感觉回来了哈哈哈哈哈哈
enfp小狗每天都发很多消息[闭嘴](我俩现在小红书经常互cue,尤其在那种求脱单的帖子下面哈哈哈哈哈哈啊[喵喵][awsl][互粉])
#上课上课##读博日记##plog##好好吃饭#
中午和仙女lu 仙女xi吃太古,我吃了好多胡萝卜和洋葱[awsl](她们都不吃)
下午和大星上课,互相交换了各种购物商品,吃穿用度(甚至包括脱毛膏…)小女生的感觉回来了哈哈哈哈哈哈
enfp小狗每天都发很多消息[闭嘴](我俩现在小红书经常互cue,尤其在那种求脱单的帖子下面哈哈哈哈哈哈啊[喵喵][awsl][互粉])
#上课上课##读博日记##plog##好好吃饭#
【info】去年与Katie Crutchfield组建了组合Plains的美国创作女歌手Jess Williamson将于6月9日发行最新个人专辑Time Ain’t Accidental,预购:https://t.cn/A6CkekdX
Tracklist:
01 Time Ain’t Accidental
02 Hunter
03 Chasing Spirits
04 Tobacco Two Step
05 God in Everything
06 A Few Seasons
07 Topanga Two Step
08 Something’s in the Way
09 Stampede
10 I’d Come to Your Call
11 Roads
Tracklist:
01 Time Ain’t Accidental
02 Hunter
03 Chasing Spirits
04 Tobacco Two Step
05 God in Everything
06 A Few Seasons
07 Topanga Two Step
08 Something’s in the Way
09 Stampede
10 I’d Come to Your Call
11 Roads
###Third code of Transn Internship
def detect_contains_term(input_src, input_target):
detect_result = {}
# print('原文不存在')
data_contains_src = dot_term_info[dot_term_info['src_text'].str.contains(input_src)] # 查找src_text中包含input_src字符串的行
# print(len(data_contains_src))
if len(data_contains_src) > 0: # contains包含字符串查找
# 包含
detect_result['term_input_result'] = '存在相似术语'
data_contains_src = data_contains_src.drop_duplicates() # 去重
# data_contains_src_tran = pd.unique(data_contains_src['target_text'])
data_contains_src_tran = data_contains_src['target_text'].to_numpy() # 转为numpy格式,便于后续求相似度
# 求max_distance
distance = []
for contains_target_text in data_contains_src_tran:
distance.append(cal_distance(contains_target_text, input_target))
max_distance = max(distance)
max_distance_target = data_contains_src_tran[distance.index(max_distance)] # max_distance对应的译文
max_distance_target_src = data_contains_src[data_contains_src['target_text'] == max_distance_target]['src_text'].tolist() # max_distance对应的译文对应的原文,可能有重复
max_distance_target_term_id = data_contains_src[data_contains_src['target_text'] == max_distance_target]['term_id'].tolist() # max_distance对应的译文对应的term_id,可能有重复
# print(max_distance_target_src, max_distance_target)
detect_result['term_id'] = [i for i in max_distance_target_term_id]
detect_result['term_tran_text'] = max_distance_target
# input_src与取出的src_text的distance取average
src_distance = 0
for j in max_distance_target_src:
src_distance += cal_distance(input_src, j)
src_distance = src_distance/len(max_distance_target_src)
# 计算正确率
acc_pro = (max_distance+src_distance)/2
detect_result['term_ratio'] = acc_pro
else:
# 不包含
detect_result['term_input_result'] = '不存在相似的术语'
detect_result['term_id'] = ''
detect_result['term_tran_text'] = ''
detect_result['term_ratio'] = ''
# acc_pro = 'Non_existent,Non_contains,to be continue...'
return detect_result
def detect_contains_term(input_src, input_target):
detect_result = {}
# print('原文不存在')
data_contains_src = dot_term_info[dot_term_info['src_text'].str.contains(input_src)] # 查找src_text中包含input_src字符串的行
# print(len(data_contains_src))
if len(data_contains_src) > 0: # contains包含字符串查找
# 包含
detect_result['term_input_result'] = '存在相似术语'
data_contains_src = data_contains_src.drop_duplicates() # 去重
# data_contains_src_tran = pd.unique(data_contains_src['target_text'])
data_contains_src_tran = data_contains_src['target_text'].to_numpy() # 转为numpy格式,便于后续求相似度
# 求max_distance
distance = []
for contains_target_text in data_contains_src_tran:
distance.append(cal_distance(contains_target_text, input_target))
max_distance = max(distance)
max_distance_target = data_contains_src_tran[distance.index(max_distance)] # max_distance对应的译文
max_distance_target_src = data_contains_src[data_contains_src['target_text'] == max_distance_target]['src_text'].tolist() # max_distance对应的译文对应的原文,可能有重复
max_distance_target_term_id = data_contains_src[data_contains_src['target_text'] == max_distance_target]['term_id'].tolist() # max_distance对应的译文对应的term_id,可能有重复
# print(max_distance_target_src, max_distance_target)
detect_result['term_id'] = [i for i in max_distance_target_term_id]
detect_result['term_tran_text'] = max_distance_target
# input_src与取出的src_text的distance取average
src_distance = 0
for j in max_distance_target_src:
src_distance += cal_distance(input_src, j)
src_distance = src_distance/len(max_distance_target_src)
# 计算正确率
acc_pro = (max_distance+src_distance)/2
detect_result['term_ratio'] = acc_pro
else:
# 不包含
detect_result['term_input_result'] = '不存在相似的术语'
detect_result['term_id'] = ''
detect_result['term_tran_text'] = ''
detect_result['term_ratio'] = ''
# acc_pro = 'Non_existent,Non_contains,to be continue...'
return detect_result
✋热门推荐