pandas
- 什么是pandas
官网 :pandas is an open source, BSD-licensed library providing high-performance, easy-to-use data structures and data analysis tools for the Python programming language.
- 常用数据类型
- Series 一维, 带标签的数组
- DataFrame 二维, Series 容器
import pandas as pd
import string
import numpy as np
t = pd.Series(np.arange(10), index=list(string.ascii_uppercase[:10]))
t
A 0
B 1
C 2
D 3
E 4
F 5
G 6
H 7
I 8
J 9
dtype: int64
type(t)
pandas.core.series.Series
a = {string.ascii_uppercase[i] : i for i in range(10)}
a
{'A': 0,
'B': 1,
'C': 2,
'D': 3,
'E': 4,
'F': 5,
'G': 6,
'H': 7,
'I': 8,
'J': 9}
pd.Series(a)
A 0
B 1
C 2
D 3
E 4
F 5
G 6
H 7
I 8
J 9
dtype: int64
pd.Series(a, index=list(string.ascii_uppercase[5:15]))
# dtype 为 float64 np中的nan 为float
F 5.0
G 6.0
H 7.0
I 8.0
J 9.0
K NaN
L NaN
M NaN
N NaN
O NaN
dtype: float64
t
A 0
B 1
C 2
D 3
E 4
F 5
G 6
H 7
I 8
J 9
dtype: int64
t[2:10:2]
C 2
E 4
G 6
I 8
dtype: int64
t[1]
1
t[[2, 3, 6]]
C 2
D 3
G 6
dtype: int64
t[t>4]
F 5
G 6
H 7
I 8
J 9
dtype: int64
t["F"]
5
t[["A", "F", "g"]]
/anaconda3/lib/python3.6/site-packages/pandas/core/series.py:851: FutureWarning:
Passing list-likes to .loc or [] with any missing label will raise
KeyError in the future, you can use .reindex() as an alternative.
See the documentation here:
https://pandas.pydata.org/pandas-docs/stable/indexing.html#deprecate-loc-reindex-listlike
return self.loc[key]
A 0.0
F 5.0
g NaN
dtype: float64
t.index
Index(['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J'], dtype='object')
t.values
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
type(t.index)
pandas.core.indexes.base.Index
type(t.values)
numpy.ndarray
pandas之DataFrame
- 通过粘贴板创建DataFrame
import webbrowser
link = "https://www.tiobe.com/tiobe-index/"
webbrowser.open(link)
True
df = pd.read_clipboard()
df
t = pd.DataFrame(np.arange(12).reshape((3, 4)))
t
<div>
<style scoped>
.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}
.dataframe tbody tr th {
vertical-align: top;
}
.dataframe thead th {
text-align: right;
}
</style>
<table border="1" class="dataframe">
<thead>
<tr style="text-align: right;">
<th></th>
<th>0</th>
<th>1</th>
<th>2</th>
<th>3</th>
</tr>
</thead>
<tbody>
<tr>
<th>0</th>
<td>0</td>
<td>1</td>
<td>2</td>
<td>3</td>
</tr>
<tr>
<th>1</th>
<td>4</td>
<td>5</td>
<td>6</td>
<td>7</td>
</tr>
<tr>
<th>2</th>
<td>8</td>
<td>9</td>
<td>10</td>
<td>11</td>
</tr>
</tbody>
</table>
</div>
- DataFrame对象既有行索引,又有列索引
- 行索引,表明不同行,横向索引,叫index,0轴,axis=0
- 列索引,表名不同列,纵向索引,叫columns,1轴,axis=1
t1 = pd.DataFrame(np.arange(12).reshape(3, 4), index=list(string.ascii_lowercase[:3]), columns=list(string.ascii_uppercase[-4:]))
t1
<div>
<style scoped>
.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}
.dataframe tbody tr th {
vertical-align: top;
}
.dataframe thead th {
text-align: right;
}
</style>
<table border="1" class="dataframe">
<thead>
<tr style="text-align: right;">
<th></th>
<th>W</th>
<th>X</th>
<th>Y</th>
<th>Z</th>
</tr>
</thead>
<tbody>
<tr>
<th>a</th>
<td>0</td>
<td>1</td>
<td>2</td>
<td>3</td>
</tr>
<tr>
<th>b</th>
<td>4</td>
<td>5</td>
<td>6</td>
<td>7</td>
</tr>
<tr>
<th>c</th>
<td>8</td>
<td>9</td>
<td>10</td>
<td>11</td>
</tr>
</tbody>
</table>
</div>
- 通过传入字典创建
d1 = {"name":["xiaoming", "xiaogang"], "age":[20, 22], "tel": [10086, 10010]}
# 传入字典
t2 = pd.DataFrame(d1)
t2
<div>
<style scoped>
.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}
.dataframe tbody tr th {
vertical-align: top;
}
.dataframe thead th {
text-align: right;
}
</style>
<table border="1" class="dataframe">
<thead>
<tr style="text-align: right;">
<th></th>
<th>name</th>
<th>age</th>
<th>tel</th>
</tr>
</thead>
<tbody>
<tr>
<th>0</th>
<td>xiaoming</td>
<td>20</td>
<td>10086</td>
</tr>
<tr>
<th>1</th>
<td>xiaogang</td>
<td>22</td>
<td>10010</td>
</tr>
</tbody>
</table>
</div>
type(t2)
pandas.core.frame.DataFrame
# 通过列表字典创建
d2 = [{"name": "xiaoming","age":33,"tel":10010},{"name":"xiaohong","tel": 10010},{"name":"xiaowang","age":19}]
t3 = pd.DataFrame(d2) #没有值得地方是NaN
t3
<div>
<style scoped>
.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}
.dataframe tbody tr th {
vertical-align: top;
}
.dataframe thead th {
text-align: right;
}
</style>
<table border="1" class="dataframe">
<thead>
<tr style="text-align: right;">
<th></th>
<th>age</th>
<th>name</th>
<th>tel</th>
</tr>
</thead>
<tbody>
<tr>
<th>0</th>
<td>33.0</td>
<td>xiaoming</td>
<td>10010.0</td>
</tr>
<tr>
<th>1</th>
<td>NaN</td>
<td>xiaohong</td>
<td>10010.0</td>
</tr>
<tr>
<th>2</th>
<td>19.0</td>
<td>xiaowang</td>
<td>NaN</td>
</tr>
</tbody>
</table>
</div>
属性
t3.index
#行索引
RangeIndex(start=0, stop=3, step=1)
t3.columns
#列索引
Index(['age', 'name', 'tel'], dtype='object')
t3.values
# ndarray类型
array([[33.0, 'xiaoming', 10010.0],
[nan, 'xiaohong', 10010.0],
[19.0, 'xiaowang', nan]], dtype=object)
t3.shape
(3, 3)
t3.dtypes
age float64
name object
tel float64
dtype: object
# 维度
t3.ndim
2