apache · guan404ming · Jan 12, 2026 · Jan 12, 2026
diff --git a/qdp/qdp-python/README.md b/qdp/qdp-python/README.md
@@ -15,11 +15,11 @@ engine = QdpEngine(0)
 
 # Encode data from Python list
 data = [0.5, 0.5, 0.5, 0.5]
-dlpack_ptr = engine.encode(data, num_qubits=2, encoding_method="amplitude")
+qtensor = engine.encode(data, num_qubits=2, encoding_method="amplitude")
 
 # Or encode from file formats
-tensor_parquet = engine.encode_from_parquet("data.parquet", 10, "amplitude")
-tensor_arrow = engine.encode_from_arrow_ipc("data.arrow", 10, "amplitude")
+tensor_parquet = engine.encode("data.parquet", 10, "amplitude")
+tensor_arrow = engine.encode("data.arrow", 10, "amplitude")
 ```
 
 ## Build from source
@@ -41,8 +41,9 @@ uv run maturin develop
 
 ## File format support
 
-- **Parquet** - `encode_from_parquet(path, num_qubits, encoding_method)`
-- **Arrow IPC** - `encode_from_arrow_ipc(path, num_qubits, encoding_method)`
+- **Parquet** - `.parquet` files
+- **Arrow IPC** - `.arrow` or `.feather` files
+- **NumPy** - `.npy` files
 
 ## Adding new bindings
 

diff --git a/qdp/qdp-python/benchmark/benchmark_e2e.py b/qdp/qdp-python/benchmark/benchmark_e2e.py
@@ -267,23 +267,23 @@ def run_mahout_parquet(engine, n_qubits, n_samples):
 
     # Direct Parquet to GPU pipeline
     parquet_encode_start = time.perf_counter()
-    batched_tensor = engine.encode_from_parquet(DATA_FILE, n_qubits, "amplitude")
+    qtensor = engine.encode(DATA_FILE, n_qubits)
     parquet_encode_time = time.perf_counter() - parquet_encode_start
     print(f"  Parquet->GPU (IO+Encode): {parquet_encode_time:.4f} s")
 
-    # Convert to torch tensor (single DLPack call)
+    # Convert to torch tensor
     dlpack_start = time.perf_counter()
-    gpu_batched = torch.from_dlpack(batched_tensor)
+    gpu_batched = torch.from_dlpack(qtensor)
     dlpack_time = time.perf_counter() - dlpack_start
     print(f"  DLPack conversion: {dlpack_time:.4f} s")
 
-    # Tensor is already 2D [n_samples, state_len] from to_dlpack()
+    # Tensor is already 2D [n_samples, state_len]
     state_len = 1 << n_qubits
     assert gpu_batched.shape == (n_samples, state_len), (
         f"Expected shape ({n_samples}, {state_len}), got {gpu_batched.shape}"
     )
 
-    # Convert to float for model (batch already on GPU)
+    # Convert to float for model
     reshape_start = time.perf_counter()
     gpu_all_data = gpu_batched.abs().to(torch.float32)
     reshape_time = time.perf_counter() - reshape_start
@@ -318,16 +318,16 @@ def run_mahout_arrow(engine, n_qubits, n_samples):
     start_time = time.perf_counter()
 
     arrow_encode_start = time.perf_counter()
-    batched_tensor = engine.encode_from_arrow_ipc(ARROW_FILE, n_qubits, "amplitude")
+    qtensor = engine.encode(ARROW_FILE, n_qubits)
     arrow_encode_time = time.perf_counter() - arrow_encode_start
     print(f"  Arrow->GPU (IO+Encode): {arrow_encode_time:.4f} s")
 
     dlpack_start = time.perf_counter()
-    gpu_batched = torch.from_dlpack(batched_tensor)
+    gpu_batched = torch.from_dlpack(qtensor)
     dlpack_time = time.perf_counter() - dlpack_start
     print(f"  DLPack conversion: {dlpack_time:.4f} s")
 
-    # Tensor is already 2D [n_samples, state_len] from to_dlpack()
+    # Tensor is already 2D [n_samples, state_len]
     state_len = 1 << n_qubits
     assert gpu_batched.shape == (n_samples, state_len), (
         f"Expected shape ({n_samples}, {state_len}), got {gpu_batched.shape}"

diff --git a/qdp/qdp-python/benchmark/benchmark_latency.py b/qdp/qdp-python/benchmark/benchmark_latency.py
@@ -136,7 +136,7 @@ def run_mahout(num_qubits: int, total_batches: int, batch_size: int, prefetch: i
 
     for batch in prefetched_batches(total_batches, batch_size, vector_len, prefetch):
         normalized = normalize_batch(batch)
-        qtensor = engine.encode_batch(normalized, num_qubits, "amplitude")
+        qtensor = engine.encode(normalized, num_qubits, "amplitude")
         _ = torch.utils.dlpack.from_dlpack(qtensor)
         processed += normalized.shape[0]
 

diff --git a/qdp/qdp-python/benchmark/benchmark_numpy_io.py b/qdp/qdp-python/benchmark/benchmark_numpy_io.py
@@ -79,8 +79,8 @@ def run_mahout_numpy(num_qubits: int, num_samples: int, npy_path: str):
     start_total = time.perf_counter()
 
     try:
-        # Use the NumPy reader API
-        qtensor = engine.encode_from_numpy(npy_path, num_qubits, "amplitude")
+        # Use the unified encode API with file path
+        qtensor = engine.encode(npy_path, num_qubits, "amplitude")
         tensor = torch.utils.dlpack.from_dlpack(qtensor)
 
         # Small computation to ensure GPU has processed the data

diff --git a/qdp/qdp-python/benchmark/benchmark_throughput.py b/qdp/qdp-python/benchmark/benchmark_throughput.py
@@ -127,8 +127,8 @@ def run_mahout(num_qubits: int, total_batches: int, batch_size: int, prefetch: i
         total_batches, batch_size, 1 << num_qubits, prefetch
     ):
         normalized = np.ascontiguousarray(normalize_batch(batch), dtype=np.float64)
-        qtensor = engine.encode_batch(normalized, num_qubits, "amplitude")
-        tensor = torch.utils.dlpack.from_dlpack(qtensor).abs().to(torch.float32)
+        qtensor = engine.encode(normalized, num_qubits)
+        tensor = torch.from_dlpack(qtensor).abs().to(torch.float32)
         _ = tensor.sum()
         processed += normalized.shape[0]