Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions _duckdb-stubs/__init__.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -707,7 +707,7 @@ class DuckDBPyRelation:
def tf(self) -> dict[str, typing.Any]: ...
def to_csv(
self,
file_name: str,
file_name: str | os.PathLike[str],
*,
sep: str | None = None,
na_rep: str | None = None,
Expand All @@ -728,7 +728,7 @@ class DuckDBPyRelation:
def to_df(self, *, date_as_object: bool = False) -> pandas.DataFrame: ...
def to_parquet(
self,
file_name: str,
file_name: str | os.PathLike[str],
*,
compression: ParquetCompression | None = None,
field_ids: ParquetFieldsOptions | None = None,
Expand Down Expand Up @@ -764,7 +764,7 @@ class DuckDBPyRelation:
) -> DuckDBPyRelation: ...
def write_csv(
self,
file_name: str,
file_name: str | os.PathLike[str],
*,
sep: str | None = None,
na_rep: str | None = None,
Expand All @@ -784,7 +784,7 @@ class DuckDBPyRelation:
) -> None: ...
def write_parquet(
self,
file_name: str,
file_name: str | os.PathLike[str],
*,
compression: ParquetCompression | None = None,
field_ids: ParquetFieldsOptions | None = None,
Expand Down Expand Up @@ -1272,7 +1272,7 @@ def values(*args: IntoValues, connection: DuckDBPyConnection | None = None) -> D
def view(view_name: str, *, connection: DuckDBPyConnection | None = None) -> DuckDBPyRelation: ...
def write_csv(
df: pandas.DataFrame,
filename: str,
filename: str | os.PathLike[str],
*,
sep: str | None = None,
na_rep: str | None = None,
Expand Down
2 changes: 1 addition & 1 deletion src/duckdb_python.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -866,7 +866,7 @@ static void InitializeConnectionMethods(nb::module_ &m) {
nb::arg("connection").none() = nb::none());
m.def(
"write_csv",
[](const PandasDataFrame &df, const string &filename, const nb::object &sep = nb::none(),
[](const PandasDataFrame &df, const nb::object &filename, const nb::object &sep = nb::none(),
const nb::object &na_rep = nb::none(), const nb::object &header = nb::none(),
const nb::object &quotechar = nb::none(), const nb::object &escapechar = nb::none(),
const nb::object &date_format = nb::none(), const nb::object &timestamp_format = nb::none(),
Expand Down
3 changes: 3 additions & 0 deletions src/include/duckdb_python/path_like.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,9 @@ namespace duckdb {

struct DuckDBPyConnection;

bool TryDecodePath(const nb::object &object, string &result);
string PathToString(const nb::object &object);

struct PathLike {
static PathLike Create(const nb::object &object, DuckDBPyConnection &connection);
// The file(s) extracted from object
Expand Down
4 changes: 2 additions & 2 deletions src/include/duckdb_python/pyrelation.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -211,15 +211,15 @@ struct DuckDBPyRelation {
std::unique_ptr<DuckDBPyRelation> Join(DuckDBPyRelation *other, const nb::object &condition, const string &type);
std::unique_ptr<DuckDBPyRelation> Cross(DuckDBPyRelation *other);

void ToParquet(const string &filename, const nb::object &compression = nb::none(),
void ToParquet(const nb::object &file_name, const nb::object &compression = nb::none(),
const nb::object &field_ids = nb::none(), const nb::object &row_group_size_bytes = nb::none(),
const nb::object &row_group_size = nb::none(), const nb::object &overwrite = nb::none(),
const nb::object &per_thread_output = nb::none(), const nb::object &use_tmp_file = nb::none(),
const nb::object &partition_by = nb::none(), const nb::object &write_partition_columns = nb::none(),
const nb::object &append = nb::none(), const nb::object &filename_pattern = nb::none(),
const nb::object &file_size_bytes = nb::none());

void ToCSV(const string &filename, const nb::object &sep = nb::none(), const nb::object &na_rep = nb::none(),
void ToCSV(const nb::object &file_name, const nb::object &sep = nb::none(), const nb::object &na_rep = nb::none(),
const nb::object &header = nb::none(), const nb::object &quotechar = nb::none(),
const nb::object &escapechar = nb::none(), const nb::object &date_format = nb::none(),
const nb::object &timestamp_format = nb::none(), const nb::object &quoting = nb::none(),
Expand Down
26 changes: 22 additions & 4 deletions src/path_like.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,15 +34,33 @@ struct PathLikeProcessor {
vector<string> fs_files;
};

void PathLikeProcessor::AddFile(const nb::object &object) {
bool TryDecodePath(const nb::object &object, string &result) {
if (nb::isinstance<nb::str>(object)) {
all_files.push_back(nb::cast<std::string>(nb::str(object)));
return;
result = nb::cast<string>(object);
return true;
}
if (nb::isinstance<nb::bytes>(object) || nb::hasattr(object, "__fspath__")) {
// A bytes path or an os.PathLike object (e.g. pathlib.Path) - decode it to a string
auto fsdecode = nb::module_::import_("os").attr("fsdecode");
all_files.push_back(nb::cast<std::string>(nb::str(fsdecode(object))));
result = nb::cast<string>(fsdecode(object));
return true;
}
return false;
}

string PathToString(const nb::object &object) {
string result;
if (!TryDecodePath(object, result)) {
throw InvalidInputException("Expected a str, bytes, or os.PathLike object for the file path, not '%s'",
Py_TYPE(object.ptr())->tp_name);
}
return result;
}

void PathLikeProcessor::AddFile(const nb::object &object) {
string decoded;
if (TryDecodePath(object, decoded)) {
all_files.push_back(std::move(decoded));
return;
}
// This is (assumed to be) a file-like object
Expand Down
18 changes: 11 additions & 7 deletions src/pyrelation.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#include "duckdb_python/nb/casters.hpp"
#include "duckdb_python/pyrelation.hpp"
#include "duckdb_python/path_like.hpp"
#include "duckdb_python/pyconnection/pyconnection.hpp"
#include "duckdb_python/pytype.hpp"
#include "duckdb_python/pyresult.hpp"
Expand Down Expand Up @@ -1253,12 +1254,14 @@ static Value NestedDictToStruct(const nb::object &dictionary) {
return Value::STRUCT(std::move(children));
}

void DuckDBPyRelation::ToParquet(const string &filename, const nb::object &compression, const nb::object &field_ids,
const nb::object &row_group_size_bytes, const nb::object &row_group_size,
const nb::object &overwrite, const nb::object &per_thread_output,
const nb::object &use_tmp_file, const nb::object &partition_by,
const nb::object &write_partition_columns, const nb::object &append,
const nb::object &filename_pattern, const nb::object &file_size_bytes) {
void DuckDBPyRelation::ToParquet(const nb::object &file_name, const nb::object &compression,
const nb::object &field_ids, const nb::object &row_group_size_bytes,
const nb::object &row_group_size, const nb::object &overwrite,
const nb::object &per_thread_output, const nb::object &use_tmp_file,
const nb::object &partition_by, const nb::object &write_partition_columns,
const nb::object &append, const nb::object &filename_pattern,
const nb::object &file_size_bytes) {
auto filename = PathToString(file_name);
case_insensitive_map_t<vector<Value>> options;

if (!nb::none().is(compression)) {
Expand Down Expand Up @@ -1371,13 +1374,14 @@ void DuckDBPyRelation::ToParquet(const string &filename, const nb::object &compr
PyExecuteRelation(write_parquet);
}

void DuckDBPyRelation::ToCSV(const string &filename, const nb::object &sep, const nb::object &na_rep,
void DuckDBPyRelation::ToCSV(const nb::object &file_name, const nb::object &sep, const nb::object &na_rep,
const nb::object &header, const nb::object &quotechar, const nb::object &escapechar,
const nb::object &date_format, const nb::object &timestamp_format,
const nb::object &quoting, const nb::object &encoding, const nb::object &compression,
const nb::object &overwrite, const nb::object &per_thread_output,
const nb::object &use_tmp_file, const nb::object &partition_by,
const nb::object &write_partition_columns) {
auto filename = PathToString(file_name);
case_insensitive_map_t<vector<Value>> options;

if (!nb::none().is(sep)) {
Expand Down
13 changes: 13 additions & 0 deletions tests/fast/api/test_to_csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -299,3 +299,16 @@ def test_to_csv_use_tmp_file(self):
rel.to_csv(temp_file_name, header=True, use_tmp_file=True)
csv_rel = duckdb.read_csv(temp_file_name, header=True)
assert rel.execute().fetchall() == csv_rel.execute().fetchall()

def test_to_csv_pathlib(self, tmp_path):
file_path = tmp_path / "test.csv" # pathlib.Path
df = pd.DataFrame({"a": [5, 3, 23, 2], "b": [45, 234, 234, 2]})
rel = duckdb.from_df(df)
rel.to_csv(file_path)
assert rel.execute().fetchall() == duckdb.read_csv(file_path).execute().fetchall()

def test_to_csv_rejects_non_path(self):
df = pd.DataFrame({"a": [5, 3, 23, 2], "b": [45, 234, 234, 2]})
rel = duckdb.from_df(df)
with pytest.raises(duckdb.InvalidInputException):
rel.to_csv(123)
13 changes: 13 additions & 0 deletions tests/fast/api/test_to_parquet.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,19 @@ def test_use_tmp_file(self):
result = duckdb.read_parquet(temp_file_name)
assert rel.execute().fetchall() == result.execute().fetchall()

def test_to_parquet_pathlib(self, tmp_path):
file_name = tmp_path / "test.parquet" # pathlib.Path
df = pd.DataFrame({"a": [5, 3, 23, 2], "b": [45, 234, 234, 2]})
rel = duckdb.from_df(df)
rel.to_parquet(file_name)
assert rel.execute().fetchall() == duckdb.read_parquet(file_name).execute().fetchall()

def test_to_parquet_rejects_non_path(self):
df = pd.DataFrame({"a": [5, 3, 23, 2], "b": [45, 234, 234, 2]})
rel = duckdb.from_df(df)
with pytest.raises(duckdb.InvalidInputException):
rel.to_parquet(123)

def test_per_thread_output(self):
temp_file_name = os.path.join(tempfile.mkdtemp(), next(tempfile._get_candidate_names())) # noqa: PTH118
num_threads = duckdb.sql("select current_setting('threads')").fetchone()[0]
Expand Down
Loading