1.首先先安装BeautifulSoup
pip install BeautifulSoup4
安装成功后,使用命令pip list
可以查看安装的版本
安装bs4.png
2.使用BeautifulSoup
from bs4 import BeautifulSoup
if __name__ == '__main__':
html_doc = """
<div id='change-property' class='clear-b'>
<table class=\"table table-bordered table-striped\">
<tbody>
<tr>
<td class=\"span7 word-break\">
<div><small class=\"muted\">30/11/2018 11:56</small></div>
<div>Pengembalian Saldo Tokopedia untuk Transaksi 293647261 (Transaksi kedaluwarsa)</div>
</td>
<td class=\"span2\">
<div><small>Nominal</small> <br> <span class=\"text-debit\">Rp 227</span></div>
</td>
<td class=\"span3\">
<div><small>Balance</small> <br> <strong>Rp 227</strong></div>
</td>
</tr>
<tr>
<td class=\"span7 word-break\">
<div><small class=\"muted\">29/11/2018 07:54</small></div>
<div>Penggunaan Saldo Tokopedia untuk pembelian dari Tokopedia. [ID Transaksi: 383256382]</div>
</td>
<td class=\"span2\">
<div><small>Nominal</small> <br> <span class=\"text-credit\">Rp 227</span></div>
</td>
<td class=\"span3\">
<div><small>Balance</small> <br> <strong>Rp 0</strong></div>
</td>
</tr>
</tbody>
</table>
</div>
"""
soup = BeautifulSoup(html_doc, 'lxml')
trs = soup.find_all('tr')
for tr in trs:
print("time:" + tr.find(attrs={'class': "span7 word-break"}).find(attrs={'class': "muted"}).get_text())
print("detail:" + tr.find(attrs={'class': "span7 word-break"}).find_all('div')[1].get_text())
print()
运行结果:运行结果.png