# PyQuery
# pip install pyquery
# 字符串初始化
html = """
<div class="wrap">
<div id='container'>
<ul class=list>
<li class="item-0">first item</li>
<li class="item-1"><a href="link2.html">second item</a></li>
<li class="item-0 active"><a href="link3.html"><span class="boid">third item</span></a></li>
<li class="item-1 active"><a href="link4.html">fourth item</a></li>
<li class="item-0"><a href="link5.html">fifth item</a></li>
</ul>
</div>
</div>
"""
from pyquery import PyQuery as pq
doc = pq(html)
print(doc("li"))
print('----------------------------------')
# URL初始化
doc = pq(url='http://www.baidu.com')
print(doc('head')) # 输出head标签
print('----------------------------------')
# 文件初始化
# doc = pq(filename='xx.html') # file 文件路径
# print(doc("li"))
print('----------------------------------')
# 基本CSS选择器
doc = pq(html)
print(doc('#container .list li')) # id # class .
# 查找元素
#
# 子元素
doc = pq(html)
items = doc('.list')
print(type(items))
print(items)
lis = items.find("li")
print(type(lis))
print(lis)
lis = items.children() # 直接子元素
print(type(lis))
print(lis)
lis = items.children('.active') # 直接子元素 class为active
print(lis)
# 父元素
doc = pq(html)
items = doc('.list')
container = items.parent() # 直接父元素
print(type(container))
print(container)
parents = items.parents() # 所有祖先元素
print(type(parents))
print(parents)
parent = items.parents('.wrap') # 祖先元素中 calss为wrap的元素
print(parent)
# 兄弟元素
print('---')
doc = pq(html)
li = doc('.list .item-0.active') # .item-0.active 表并列 同时包含
print(li.siblings()) # 所有兄弟元素
print(li.siblings('.active')) # 兄弟元素中class 为active的元素
# 遍历
#
# 单个元素
li = doc('.item-0.active')
print(li)
doc = pq(html)
lis = doc('li').items() # items() 生成器
print(type(lis))
for li in lis:
print(li)
# 获取信息
#
# 获取属性
doc = pq(html)
a = doc('.item-1.active a') # 获取a标签
print(a)
print(a.attr('href')) # 获取属性
print(a.attr.href) # 获取属性
print(a.text()) # 获取文本
li = doc('.item-0.active')
print(li)
print(li.html()) # 获取HTML
# DOM 操作
#
# assClass. removeClass
doc = pq(html)
li = doc('.item-0.active')
print(li)
li.removeClass('active') # 删除class
print(li)
li.addClass('active') # 添加class
print(li)
doc = pq(html)
li = doc('.item-0.active')
print(li)
li.attr('name', 'link') # 有则覆盖。无则添加。
li.css('font-size', '14px') # 添加style
print(li)
html = '''
<div class='wrap'>
Hello,World
<p>This is a paragraoh</p>
</div>
'''
doc = pq(html)
wrap = doc('.wrap')
print(wrap.text())
wrap.find('p').remove() # 删除p标签
print(wrap.text())