import os
dir=os.getcwd()
print(dir)
dir1=os.path.join(dir,"test")
filename=os.listdir(dir1)
bad_chars = [';', ':', '!', "*","#","%"]
for i in filename:
filepath=os.path.join(dir1,i) # the path
file=open(filepath,"r",encoding="utf8") #open first text file
read_=file.read()
fields = read_.split(" ")
print(fields)
file1=open(filepath,"w",encoding="utf8")
file2=open(filepath,"a",encoding="utf8")
for j in range(len(fields)):
for p in bad_chars :
fields[j].replace(i,' ')
file2.write(fields[j])
print ("Resultant list is : " , fields[j])
file.close()
file1.close()
file2.close()
我正在尝试从所有200个文本文件中删除特殊字符
这是我找到的bigram的代码
import nltk
from collections import Counter
fo = open(r"Aacha_S_001_0_E1800.txt",errors='ignore',encoding='utf8')
fo1 = fo.readlines()
for line in fo1:
bigm = list(nltk.bigrams(line.split()))
bigmC = Counter(bigm)
for key, value in bigmC.items():
# print(key, value)
out_put = str(key) + " was found = " + str(value) + " times" + "\n"
with open('Test1.txt','a',encoding='utf8') as f:
f.write(out_put)
f.close()
print(".............done.............")
想要在一个程序中同时使用两个代码先感谢您我的名字叫eshan。输出我的名字出现1名称,出现1是,提前发生1根据文字,发生次数可以大于1
import os dir = os.getcwd()print(dir)dir1 = os.path.join(dir,“ test”)filename = os.listdir(dir1)bad_chars = [';',':','! ',“ *”,“#”,“%”]表示文件名中的i:filepath = os.path.join(dir1,i)#路径...