TypeError:列表索引必须是整数或切片,而不是str为什么列表索引不是数字

问题描述 投票:-1回答:1
import requests
from bs4 import BeautifulSoup
import urllib.request
import re

with open('crawlingweb.csv')as f:
    content=f.readlines()
    content=[x.strip() for x in content]

for i in content:
    content[i].replace('[', '').replace(']', '')
    req = requests.get(content[i])
    html = req.text
    data = re.sub('[^0-9a-zA-Z\\s\\.\\,]', '', string=html).lower()
    data = re.sub('<[^>]*>', '', string=html)
    data = re.sub('[^ ㄱ-ㅣ가-힣]+', '', string=html)
    print(data)

content [i] .replace('[',).replace(']','')这行是错误的我想要的是重复此代码,然后一次将内容中的整个地址抓取并打印出来。

import requests
from bs4 import BeautifulSoup
import urllib.request
import re

with open('crawlingweb.csv')as f:
    content=f.readlines()
    content=[x.strip() for x in content]
    content[183].replace('[','').replace(']','')

req = requests.get(content[183])
html = req.text

data = re.sub('[^0-9a-zA-Z\\s\\.\\,]', '', string=html).lower()
data = re.sub('<[^>]*>','',string=html)
data = re.sub('[^ ㄱ-ㅣ가-힣]+','',string=html)
print(data)

此代码有效

谢谢您的阅读

python-3.x web-crawler
1个回答
0
投票

因为

with open('crawlingweb.csv')as f:               # this is a file
    content=f.readlines()                       # this is a list of strings
    content=[x.strip() for x in content]        # this is still a list of strings

for i in content:               # i is a string

您想要的是:

for index,line  in enumerate(content):
    content[index] = line.replace('[', '').replace(']', '')

或像这样进一步做:

with open('crawlingweb.csv')as f:               
    content=f.readlines()                       
    content=[x.strip().replace('[', '').replace(']', '') for x in content]
© www.soinside.com 2019 - 2024. All rights reserved.