面试题：Python批量处理不同类型文件内容

import os
import csv
import json


def count_words_in_txt(file_path):
    word_count = {}
    try:
        with open(file_path, 'r', encoding='utf-8') as file:
            for line in file:
                words = line.strip().split()
                for word in words:
                    if word in word_count:
                        word_count[word] += 1
                    else:
                        word_count[word] = 1
    except FileNotFoundError:
        print(f"文本文件 {file_path} 未找到。")
    except UnicodeDecodeError:
        print(f"文件 {file_path} 解码错误。")
    return word_count


def process_csv(file_path, output_path):
    data = []
    try:
        with open(file_path, 'r', encoding='utf-8') as file:
            reader = csv.reader(file)
            for row in reader:
                if row:
                    data.append(row[0])
        data.sort()
        with open(output_path, 'w', encoding='utf-8', newline='') as file:
            writer = csv.writer(file)
            for item in data:
                writer.writerow([item])
    except FileNotFoundError:
        print(f"CSV文件 {file_path} 未找到。")


def merge_json_info(file_paths, output_path):
    merged_info = []
    for file_path in file_paths:
        try:
            with open(file_path, 'r', encoding='utf-8') as file:
                json_data = json.load(file)
                if 'info' in json_data:
                    merged_info.append(json_data['info'])
        except FileNotFoundError:
            print(f"JSON文件 {file_path} 未找到。")
        except json.JSONDecodeError:
            print(f"JSON文件 {file_path} 解析错误。")
    with open(output_path, 'w', encoding='utf-8') as file:
        json.dump(merged_info, file, ensure_ascii=False, indent=4)


def main(directory):
    txt_word_count = {}
    csv_output_path = os.path.join(directory, 'processed_csv.csv')
    json_output_path = os.path.join(directory, 'merged_json.json')
    json_file_paths = []
    for root, dirs, files in os.walk(directory):
        for file in files:
            file_path = os.path.join(root, file)
            if file.endswith('.txt'):
                word_count = count_words_in_txt(file_path)
                for word, count in word_count.items():
                    if word in txt_word_count:
                        txt_word_count[word] += count
                    else:
                        txt_word_count[word] = count
            elif file.endswith('.csv'):
                process_csv(file_path, csv_output_path)
            elif file.endswith('.json'):
                json_file_paths.append(file_path)
    merge_json_info(json_file_paths, json_output_path)
    print("文本文件单词统计结果:", txt_word_count)


if __name__ == "__main__":
    directory = input("请输入目录路径: ")
    main(directory)

代码说明：

count_words_in_txt函数：用于统计单个.txt文件中每个单词出现的次数，并处理文件未找到和编码错误的异常。
process_csv函数：从.csv文件中提取第一列数据，排序后写入新的.csv文件，同时处理文件未找到的异常。
merge_json_info函数：从多个.json文件中提取info字段内容并合并到一个新的.json文件，处理文件未找到和JSON解析错误的异常。
main函数：遍历指定目录，对不同类型文件调用相应处理函数，并统计所有.txt文件单词总次数。
if __name__ == "__main__"：程序入口，通过用户输入指定目录路径并执行主函数。

性能优化考虑：

文件读取：使用with open语句确保文件正确打开和关闭，提高资源管理效率。
内存使用：在处理大文件时，尽量避免一次性将整个文件读入内存。例如在处理.csv文件时，逐行读取数据而不是一次性读取整个文件。

面试题：Python批量处理不同类型文件内容

知识考点

面试题答案

代码说明：

性能优化考虑：