import polars as pl
import duckdb
# 创建示例 DataFrame
df1 = pl.DataFrame({
"id": [1, 2, 4, 5],
"left_id": [101, 102, 103, 104],
"right_id": [201, 202, 203, 105]
})
df2 = pl.DataFrame({
"id": [101, 102, 103, 104,201, 202, 203, 204],
"idid": [101, 102, 103, 104,201, 202, 203, 204]
})
conn = duckdb.connect(database=':memory:')
# conn.sql("""
# select a.id,a.left_id,b.idid as lidid, a.right_id,c.idid as ridid
# from df1 a,df2 b,df2 c
# where a.left_id=b.id or a.right_id=c.id
# """).show()
t1 = conn.sql("""
select a.id,a.left_id,b.idid as lidid
from df1 a join df2 b on a.left_id=b.id
""")
# conn.sql("""
# select * from t1
# """).show()
t2 = conn.sql("""
select a.id,a.right_id,b.idid as ridid
from df1 a join df2 b on a.right_id=b.id
""")
# conn.sql("""
# select * from t2
# """).show()
conn.sql("""
select a.id,a.left_id,a.lidid,b.right_id,b.ridid
from t1 a join t2 b on a.id=b.id
""").show()
┌───────┬─────────┬───────┬──────────┬───────┐
│ id │ left_id │ lidid │ right_id │ ridid │
│ int64 │ int64 │ int64 │ int64 │ int64 │
├───────┼─────────┼───────┼──────────┼───────┤
│ 1 │ 101 │ 101 │ 201 │ 201 │
│ 2 │ 102 │ 102 │ 202 │ 202 │
│ 4 │ 103 │ 103 │ 203 │ 203 │
└───────┴─────────┴───────┴──────────┴───────┘
conn.sql("""
select a.id,a.left_id,a.lidid,b.right_id,b.ridid
from t1 a full join t2 b on a.id=b.id
""").show()
┌───────┬─────────┬───────┬──────────┬───────┐
│ id │ left_id │ lidid │ right_id │ ridid │
│ int64 │ int64 │ int64 │ int64 │ int64 │
├───────┼─────────┼───────┼──────────┼───────┤
│ 1 │ 101 │ 101 │ 201 │ 201 │
│ 2 │ 102 │ 102 │ 202 │ 202 │
│ 4 │ 103 │ 103 │ 203 │ 203 │
│ 5 │ 104 │ 104 │ NULL │ NULL │
└───────┴─────────┴───────┴──────────┴───────┘
conn.sql("""
select a.id,a.left_id,b.idid as lidid, a.right_id,c.idid as ridid
from df1 a,df2 b,df2 c
where a.left_id=b.id and a.right_id=c.id
""").show()
┌───────┬─────────┬───────┬──────────┬───────┐
│ id │ left_id │ lidid │ right_id │ ridid │
│ int64 │ int64 │ int64 │ int64 │ int64 │
├───────┼─────────┼───────┼──────────┼───────┤
│ 1 │ 101 │ 101 │ 201 │ 201 │
│ 2 │ 102 │ 102 │ 202 │ 202 │
│ 4 │ 103 │ 103 │ 203 │ 203 │
└───────┴─────────┴───────┴──────────┴───────┘
conn.sql("""
select a.id,a.left_id,b.idid as lidid, a.right_id,c.idid as ridid
from df1 a
join df2 b on a.left_id=b.id
join df2 c on a.right_id=c.id
""").show()
┌───────┬─────────┬───────┬──────────┬───────┐
│ id │ left_id │ lidid │ right_id │ ridid │
│ int64 │ int64 │ int64 │ int64 │ int64 │
├───────┼─────────┼───────┼──────────┼───────┤
│ 1 │ 101 │ 101 │ 201 │ 201 │
│ 2 │ 102 │ 102 │ 202 │ 202 │
│ 4 │ 103 │ 103 │ 203 │ 203 │
└───────┴─────────┴───────┴──────────┴───────┘
conn.sql("""
select a.id,a.left_id,b.idid as lidid, a.right_id,c.idid as ridid
from df1 a
inner join df2 b on a.left_id=b.id
inner join df2 c on a.right_id=c.id
""").show()
┌───────┬─────────┬───────┬──────────┬───────┐
│ id │ left_id │ lidid │ right_id │ ridid │
│ int64 │ int64 │ int64 │ int64 │ int64 │
├───────┼─────────┼───────┼──────────┼───────┤
│ 1 │ 101 │ 101 │ 201 │ 201 │
│ 2 │ 102 │ 102 │ 202 │ 202 │
│ 4 │ 103 │ 103 │ 203 │ 203 │
└───────┴─────────┴───────┴──────────┴───────┘
COALESCE
conn.sql("""
select COALESCE(a.id, b.id) AS id,a.left_id,a.lidid,b.right_id,b.ridid
from t1 a full join t2 b on a.id=b.id
""").show()
pre,cur,next
import polars as pl
import duckdb
# 创建示例 DataFrame
df1 = pl.DataFrame({
"id": [1, 2, 4, 5],
"in_id": [101, 102, 103, 104],
"out_id": [102, 103, 104, 105]
})
df2 = pl.DataFrame({
"id": [101, 102, 103, 104,105, 106, 107, 108],
"idid": [101, 102, 103, 104,201, 202, 203, 204]
})
conn = duckdb.connect(database=':memory:')
conn.sql("""
select b.in_id as pre_id,a.id as cur_id,c.out_id as next_id
from df2 a, df1 b,df1 c
where a.id=b.out_id and a.id=c.in_id
""").show()
┌────────┬────────┬─────────┐
│ pre_id │ cur_id │ next_id │
│ int64 │ int64 │ int64 │
├────────┼────────┼─────────┤
│ 101 │ 102 │ 103 │
│ 102 │ 103 │ 104 │
│ 103 │ 104 │ 105 │
└────────┴────────┴─────────┘
DISTINCT ON
Clause
t2 = duckdb_conn.sql(f"""
select distinct on (lane_id,lbound_id) * from t1
""")