import requests
from bs4 import BeautifulSoup
import urllib.request
import re
with open('crawlingweb.csv')as f:
content=f.readlines()
content=[x.strip() for x in content]
for i in content:
content[i].replace('[', '').replace(']', '')
req = requests.get(content[i])
html = req.text
data = re.sub('[^0-9a-zA-Z\\s\\.\\,]', '', string=html).lower()
data = re.sub('<[^>]*>', '', string=html)
data = re.sub('[^ ㄱ-ㅣ가-힣]+', '', string=html)
print(data)
content [i] .replace('[',).replace(']','')这行是错误的我想要的是重复此代码,然后一次将内容中的整个地址抓取并打印出来。
import requests
from bs4 import BeautifulSoup
import urllib.request
import re
with open('crawlingweb.csv')as f:
content=f.readlines()
content=[x.strip() for x in content]
content[183].replace('[','').replace(']','')
req = requests.get(content[183])
html = req.text
data = re.sub('[^0-9a-zA-Z\\s\\.\\,]', '', string=html).lower()
data = re.sub('<[^>]*>','',string=html)
data = re.sub('[^ ㄱ-ㅣ가-힣]+','',string=html)
print(data)
此代码有效
谢谢您的阅读
因为
with open('crawlingweb.csv')as f: # this is a file
content=f.readlines() # this is a list of strings
content=[x.strip() for x in content] # this is still a list of strings
for i in content: # i is a string
您想要的是:
for index,line in enumerate(content):
content[index] = line.replace('[', '').replace(']', '')
或像这样进一步做:
with open('crawlingweb.csv')as f:
content=f.readlines()
content=[x.strip().replace('[', '').replace(']', '') for x in content]