MassageRobot_Dobot/Language/ask_summarize.py
2025-05-27 15:46:31 +08:00

143 lines
6.1 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import re
def extract_data_from_log(log_file_path, output_file_path):
# 打开并读取日志文件
with open(log_file_path, 'r') as file:
log_data = file.readlines()
# 用来存储提取的所有数据
extracted_data = []
# 标志位,用于记录第一次检测到 "检测到小悠小悠、小悠师傅" 后
in_detection_section = False
last_detection_index = -1
# 临时变量存储累积的文本
accumulated_text = ""
# 遍历每一行,查找符合条件的行
for i in range(len(log_data)):
# 查找每次 "检测到小悠小悠、小悠师傅" 出现的行
if "检测到小悠小悠、小悠师傅" in log_data[i]:
# 如果已经进入了一个检测区段,则提取前一个区段的数据
if in_detection_section:
# 提取该段 recognize_time、recognized_text 和 run_time
recognize_times = []
recognized_texts = []
run_times = []
for j in range(last_detection_index + 1, i):
# 提取 recognize_time
match_recognize_time = re.search(r"(recognize_time:\d+\.\d+)", log_data[j])
if match_recognize_time:
recognize_times.append(match_recognize_time.group(1))
# 提取 Recognized text
match_recognized_text = re.search(r"(Recognized text:.*?)(?=\n|$)", log_data[j])
if match_recognized_text:
recognized_texts.append(match_recognized_text.group(1))
# 提取运行时间
match_run_time = re.search(r"(运行时间:\d+\.\d+)", log_data[j])
if match_run_time:
run_times.append(match_run_time.group(1))
# 提取 accumulated_text并进行拼接去除每个累积文本的 "accumulated_text:" 前缀
match_accumulated_text = re.search(r"(accumulated_text:.*?)(?=\n|$)", log_data[j])
if match_accumulated_text:
# 去除 "accumulated_text:" 前缀并拼接
text = match_accumulated_text.group(1).replace("accumulated_text:", "").strip()
accumulated_text += text + " "
# 将提取的结果添加到列表中
extracted_data.append({
"recognize_times": recognize_times,
"recognized_texts": recognized_texts,
"run_times": run_times,
"accumulated_text": accumulated_text.strip() # 移除最后的多余空格
})
# 添加分隔符
extracted_data.append('--------------------------------')
# 标记为检测到新的 "小悠小悠"
in_detection_section = True
last_detection_index = i # 更新上一个检测的位置
# 清空 accumulated_text 为下一次检测做准备
accumulated_text = ""
# 最后一次检查,确保从最后一个检测点到文件结束的时间也被记录
if in_detection_section:
recognize_times = []
recognized_texts = []
run_times = []
for j in range(last_detection_index + 1, len(log_data)):
# 提取 recognize_time
match_recognize_time = re.search(r"(recognize_time:\d+\.\d+)", log_data[j])
if match_recognize_time:
recognize_times.append(match_recognize_time.group(1))
# 提取 Recognized text
match_recognized_text = re.search(r"(Recognized text:.*?)(?=\n|$)", log_data[j])
if match_recognized_text:
recognized_texts.append(match_recognized_text.group(1))
# 提取运行时间
match_run_time = re.search(r"(运行时间:\d+\.\d+)", log_data[j])
if match_run_time:
run_times.append(match_run_time.group(1))
# 提取 accumulated_text并进行拼接去除每个累积文本的 "accumulated_text:" 前缀
match_accumulated_text = re.search(r"(accumulated_text:.*?)(?=\n|$)", log_data[j])
if match_accumulated_text:
# 去除 "accumulated_text:" 前缀并拼接
text = match_accumulated_text.group(1).replace("accumulated_text:", "").strip()
accumulated_text += text + " "
# 将提取的结果添加到列表中
extracted_data.append({
"recognize_times": recognize_times,
"recognized_texts": recognized_texts,
"run_times": run_times,
"accumulated_text": accumulated_text.strip() # 移除最后的多余空格
})
# 添加分隔符
extracted_data.append('--------------------------------')
# 将提取的数据保存到输出文件
with open(output_file_path, 'w') as output_file:
for section in extracted_data:
if isinstance(section, dict): # 如果是字典
output_file.write("Recognize Time:\n")
for time in section["recognize_times"]:
output_file.write(time + '\n')
output_file.write("\nRecognized Text:\n")
for text in section["recognized_texts"]:
output_file.write(text + '\n')
output_file.write("\nModel Time:\n")
for run_time in section["run_times"]:
output_file.write(run_time + '\n')
output_file.write("\nAccumulated Text:\n")
output_file.write(section["accumulated_text"] + '\n')
output_file.write("\n")
elif section == '--------------------------------':
output_file.write('--------------------------------\n')
print(f"提取的数据已保存到 {output_file_path}")
if __name__ == "__main__":
# 调用函数并指定输出文件路径
log_file_path = 'Language_2025-01-03_165625.log' # 替换为实际的日志文件路径
output_file_path = 'extracted_data.txt' # 输出文件路径
extract_data_from_log(log_file_path, output_file_path)