初始化
- 使用字符串
pyquery.PyQuery("<html...")
- 使用url
pyquery.PyQuery(url=''http:///www.baidu.com")
- 使用文件
pyquery.PyQuery(filename="index.html")
开始
html = """
</div>
<a data-movie="playmask" class="z-movie-playlink" target="_blank" href="https://gaoqing.fm/view/7d3a4c10e83c">
<div id="indeximg">
<img onerror=defaultimg(this) alt="戈德堡一家 第六季" src="https://gaoqing.fm/uploads/2018/7d3a4c10e83c.jpg" data-bd-imgshare-binded="1">
</div>
<div class="z-movie-playmask" style="visibility: visible;"></div>
</a>
</div>
"""
doc = pq(html)
doc('.z-movie-playlink #indeximg ')
Out: [<div#indeximg>, <div#indeximg>]
嵌套、获取属性
html = """
</div>
<a data-movie="playmask" class="z-movie-playlink" target="_blank" href="https://gaoqing.fm/view/7d3a4c10e83c">
<div id="indeximg">
<img onerror=defaultimg(this) alt="戈德堡一家 第六季" src="https://gaoqing.fm/uploads/2018/7d3a4c10e83c.jpg" data-bd-imgshare-binded="1">
</div>
<div class="z-movie-playmask" style="visibility: visible;"></div>
</a>
</div>
"""
doc = pq(html)
items = doc('.z-movie-playlink #indeximg ')
print(items('img').attr('alt'))
print(items('img').attr.alt)
Out:
戈德堡一家 第六季
戈德堡一家 第六季
获取文本
html = """
</div>
<a data-movie="playmask" class="z-movie-playlink" target="_blank" href="https://gaoqing.fm/view/7d3a4c10e83c">
<div id="indeximg">
<img onerror=defaultimg(this) alt="戈德堡一家 第六季" src="https://gaoqing.fm/uploads/2018/7d3a4c10e83c.jpg" data-bd-imgshare-binded="1">
<a>小太阳</a>
</div>
<div class="z-movie-playmask" style="visibility: visible;">戈德堡一家</div>
</a>
</div>
"""
doc = pq(html)
items = doc('.z-movie-playlink #indeximg ')
print(items('div').text())
Out:
小太阳
获取html文本
html = """
</div>
<a data-movie="playmask" class="z-movie-playlink" target="_blank" href="https://gaoqing.fm/view/7d3a4c10e83c">
<div id="indeximg">
<img onerror=defaultimg(this) alt="戈德堡一家 第六季" src="https://gaoqing.fm/uploads/2018/7d3a4c10e83c.jpg" data-bd-imgshare-binded="1">
<a>小太阳</a>
</div>
<div class="z-movie-playmask" style="visibility: visible;">戈德堡一家</div>
</a>
</div>
"""
doc = pq(html)
items = doc('.z-movie-playlink #indeximg ')
print(items('div'))
print(items('div').html())
Out:
<div id="indeximg">
<img onerror="defaultimg(this)" alt="戈德堡一家 第六季" src="https://gaoqing.fm/uploads/2018/7d3a4c10e83c.jpg" data-bd-imgshare-binded="1"/>
<a>小太阳</a>
</div>
<img onerror="defaultimg(this)" alt="戈德堡一家 第六季" src="https://gaoqing.fm/uploads/2018/7d3a4c10e83c.jpg" data-bd-imgshare-binded="1"/>
<a>小太阳</a>
DOM操作
修改属性class
html = """
</div>
<a data-movie="playmask" class="z-movie-playlink" target="_blank" href="https://gaoqing.fm/view/7d3a4c10e83c">
<div id="indeximg">
<img onerror=defaultimg(this) alt="戈德堡一家 第六季" src="https://gaoqing.fm/uploads/2018/7d3a4c10e83c.jpg" data-bd-imgshare-binded="1">
<a>小太阳</a>
</div>
<div class="z-movie-playmask" style="visibility: visible;">戈德堡一家</div>
</a>
</div>
"""
doc = pq(html)
items = doc('.z-movie-playlink #indeximg ')
items.addClass('flag')
print(items)
items.removeClass('flag')
print(items)
Out:
<div id="indeximg" class="flag">
<img onerror="defaultimg(this)" alt="戈德堡一家 第六季" src="https://gaoqing.fm/uploads/2018/7d3a4c10e83c.jpg" data-bd-imgshare-binded="1"/>
<a>小太阳</a>
</div>
<div id="indeximg" class="">
<img onerror="defaultimg(this)" alt="戈德堡一家 第六季" src="https://gaoqing.fm/uploads/2018/7d3a4c10e83c.jpg" data-bd-imgshare-binded="1"/>
<a>小太阳</a>
</div>
修改普通属性
html = """
</div>
<a data-movie="playmask" class="z-movie-playlink" target="_blank" href="https://gaoqing.fm/view/7d3a4c10e83c">
<div id="indeximg">
<img onerror=defaultimg(this) alt="戈德堡一家 第六季" src="https://gaoqing.fm/uploads/2018/7d3a4c10e83c.jpg" data-bd-imgshare-binded="1">
<a>小太阳</a>
</div>
<div class="z-movie-playmask" style="visibility: visible;">戈德堡一家</div>
</a>
</div>
"""
doc = pq(html)
items = doc('.z-movie-playlink #indeximg ')
items.attr('flag', 'yes')
print(items)
items.attr('flag', 'no')
print(items)
Out:
<div id="indeximg" flag="yes">
<img onerror="defaultimg(this)" alt="戈德堡一家 第六季" src="https://gaoqing.fm/uploads/2018/7d3a4c10e83c.jpg" data-bd-imgshare-binded="1"/>
<a>小太阳</a>
</div>
<div id="indeximg" flag="no">
<img onerror="defaultimg(this)" alt="戈德堡一家 第六季" src="https://gaoqing.fm/uploads/2018/7d3a4c10e83c.jpg" data-bd-imgshare-binded="1"/>
<a>小太阳</a>
</div>
参考文档:https://pythonhosted.org/pyquery/