DuckDB is in memory OLAP database for data analysis.
# with pandas
cur_time = time.time()
df = pd.concat([pd.read_csv(f) for f in glob.glob('dataset/*.csv')])
print(f"time: {(time.time() - cur_time)}")
print(df.head(10))
# with duckdb
cur_time = time.time()
df = conn.execute("""
SELECT *
FROM 'dataset/*.csv'
LIMIT 10
""").df()
print(f"time: {(time.time() - cur_time)}")
print(df)