From 41b22efd8289976ac5b70cbdc118a84ea3619de6 Mon Sep 17 00:00:00 2001 From: paugier Date: Tue, 12 Oct 2021 11:57:30 +0200 Subject: [PATCH 01/32] Benchmark with HPy 0.0.3 and PyPy3 7.3.6 --- README.md | 63 +++++++++++++++++------------------ bench/bench.jl | 4 +-- bench/bench_cpy_vs_hpy.py | 57 ++++++++++++++++++++++++------- bench/make_bench_piconumpy.py | 9 ++--- 4 files changed, 83 insertions(+), 50 deletions(-) diff --git a/README.md b/README.md index a0bad3d..6b2a908 100644 --- a/README.md +++ b/README.md @@ -104,19 +104,13 @@ pypy -m pip install pip -U pypy -m pip install numpy cython pytest transonic pythran ``` -We need to install the correct version of HPy for the version of PyPy we are using: +One can check which HPy version is vendored with PyPy: ```bash pypy -c "import hpy.universal as u; print(u.get_version())" ``` -gives `('0.0.2rc2.dev12+gc9660c2', 'c9660c2')`. - -```bash -cd ~/Dev/hpy -# update to the correct commit -pypy setup.py develop -``` +gives `('0.0.3', '2196f14')`. Now we can build-install PicoNumpy: @@ -136,36 +130,36 @@ make ## Few results -As of today (6 July 2021), HPy is not yet ready for high performance, but at -least (with HPy 0.0.2) it runs ! +As of today (12 October 2021), HPy is not yet ready for high performance, but at +least (with HPy 0.0.3) it runs ! ### At home (Intel(R) Core(TM) i5-8400 CPU @ 2.80GHz) - With CPython ``` -Julia : 1 * norm = 0.00196 s -PicoNumpy (CPython C-API) : 9.42 * norm -PicoNumpy (HPy CPy ABI) : 9.95 * norm -PicoNumpy (HPy Universal) : 10.4 * norm -Transonic-Pythran : 0.497 * norm -Numpy : 27.5 * norm -PicoNumpy (purepy) : 37.3 * norm -PicoNumpy (purepy_array) : 37.7 * norm -PicoNumpy (Cython) : 28.9 * norm +Julia : 1 * norm = 0.0171 s +PicoNumpy (CPython C-API) : 11.1 * norm +PicoNumpy (HPy CPy ABI) : 11.6 * norm +PicoNumpy (HPy Universal) : 12.1 * norm +Transonic-Pythran : 0.537 * norm +Numpy : 33.8 * norm +PicoNumpy (purepy) : 43.7 * norm +PicoNumpy (purepy_array) : 44.8 * norm +PicoNumpy (Cython) : 33.9 * norm ``` - With PyPy3 ``` -Julia : 1 * norm = 0.00196 s -PicoNumpy (CPython C-API) : 34.1 * norm -PicoNumpy (HPy Universal) : 12.8 * norm -Transonic-Pythran : 0.539 * norm -Numpy : 232 * norm -PicoNumpy (purepy) : 4.39 * norm -PicoNumpy (purepy_array) : 6.33 * norm -PicoNumpy (Cython) : 274 * norm +Julia : 1 * norm = 0.0171 s +PicoNumpy (CPython C-API) : 39.2 * norm +PicoNumpy (HPy Universal) : 13.1 * norm +Transonic-Pythran : 0.562 * norm +Numpy : 286 * norm +PicoNumpy (purepy) : 5.59 * norm +PicoNumpy (purepy_array) : 7.41 * norm +PicoNumpy (Cython) : 282 * norm ``` #### Simpler benchmarks (bench/bench_cpy_vs_hpy.py) @@ -173,14 +167,19 @@ PicoNumpy (Cython) : 274 * norm - With CPython ``` -CPython C-API: 1.92 seconds -HPy [Universal]: 2.08 seconds -HPy [CPy ABI]: 2.02 seconds +{'cache_tag': 'cpython-39', + 'version': sys.version_info(major=3, minor=9, micro=6, releaselevel='final', serial=0)} +CPython C-API: 0.193 seconds (11.2 * Julia) +HPy [Universal]: 0.208 seconds (12.1 * Julia) +HPy [CPy ABI]: 0.201 seconds (11.7 * Julia) ``` - With PyPy3 ``` -CPython C-API: 5.75 seconds -HPy [Universal]: 2.11 seconds +{'cache_tag': 'pypy37', + 'version': sys.pypy_version_info(major=7, minor=3, micro=6, releaselevel='final', serial=0)} +CPython C-API: 0.592 seconds (34.6 * Julia) +HPy [Universal]: 0.207 seconds (12.1 * Julia) +Python list: 0.093 seconds ( 5.4 * Julia) ``` diff --git a/bench/bench.jl b/bench/bench.jl index 00cedff..c9d08ef 100644 --- a/bench/bench.jl +++ b/bench/bench.jl @@ -65,10 +65,10 @@ function bench(n_sleds, n_time) end -n_sleds = 10 +n_sleds = 100 n_time = 200 -nb_runs = 200 +nb_runs = 50 times = zeros(nb_runs) diff --git a/bench/bench_cpy_vs_hpy.py b/bench/bench_cpy_vs_hpy.py index e54ad54..c97f3d8 100644 --- a/bench/bench_cpy_vs_hpy.py +++ b/bench/bench_cpy_vs_hpy.py @@ -1,8 +1,9 @@ import sys -import time +from time import perf_counter import random from math import pi, cos, sin from pathlib import Path +from pprint import pprint here = Path(__file__).absolute().parent @@ -75,14 +76,18 @@ def bench(mod, n_sleds, n_time): u_init = mod.zeros(n_sleds) for i in range(n_sleds): u_init[i] += 3.5 - start = time.time() - solver(mod, board, x_init, y_init, u_init, v_init, 0.01, n_time) - end = time.time() - return end - start + times = [] + for _ in range(20): + start = perf_counter() + solver(mod, board, x_init, y_init, u_init, v_init, 0.01, n_time) + times.append(perf_counter() - start) + + times.sort() + return times[len(times) // 2] N_SLEDS = 100 -N_TIME = 2000 +N_TIME = 200 def import_piconumpy_hpy_universal(): @@ -97,18 +102,46 @@ def main(): import piconumpy._piconumpy_cpython_capi as pnp_capi - t = bench(pnp_capi, N_SLEDS, N_TIME) - print(f"CPython C-API: {t:.2f} seconds") + pprint({key: sys.implementation.__dict__[key] for key in ("cache_tag", "version")}) + + tmp_result_julia = Path("tmp_result_julia.txt") + if tmp_result_julia.exists(): + with open("tmp_result_julia.txt") as file: + norm = float(file.read()) + end = "" + else: + norm = False + end = "\n" + + t_capi = bench(pnp_capi, N_SLEDS, N_TIME) + print(f"CPython C-API: {t_capi:.3f} seconds", end=end) + if norm: + print(f" ({t_capi/norm:.1f} * Julia)") pnp_hpy_universal = import_piconumpy_hpy_universal() - t = bench(pnp_hpy_universal, N_SLEDS, N_TIME) - print(f"HPy [Universal]: {t:.2f} seconds") + t_hpy_univ = bench(pnp_hpy_universal, N_SLEDS, N_TIME) + print(f"HPy [Universal]: {t_hpy_univ:.3f} seconds", end=end) + + if norm: + print(f" ({t_hpy_univ/norm:.1f} * Julia)") if not IS_PYPY: import piconumpy._piconumpy_hpy as pnp_hpy - t = bench(pnp_hpy, N_SLEDS, N_TIME) - print(f"HPy [CPy ABI]: {t:.2f} seconds") + t_hpy_cpy_abi = bench(pnp_hpy, N_SLEDS, N_TIME) + print(f"HPy [CPy ABI]: {t_hpy_cpy_abi:.3f} seconds", end=end) + + if norm: + print(f" ({t_hpy_cpy_abi/norm:.1f} * Julia)") + + if IS_PYPY: + import piconumpy.purepy as pnp_with_list + + t_with_list = bench(pnp_with_list, N_SLEDS, N_TIME) + print(f"Python list: {t_with_list:.3f} seconds", end=end) + + if norm: + print(f" ({t_with_list/norm:4.1f} * Julia)") if __name__ == "__main__": diff --git a/bench/make_bench_piconumpy.py b/bench/make_bench_piconumpy.py index c15b3f6..c1a92d0 100644 --- a/bench/make_bench_piconumpy.py +++ b/bench/make_bench_piconumpy.py @@ -75,12 +75,12 @@ def create_tmp_file(name_module): name = fmt_name.format("Julia") print(f"{name}: 1 * norm = {norm:4.3g} s") -n_sleds = 10 +n_sleds = 100 n_time = 200 g = locals() -def timeit(name_func, name): +def timeit(name_func, name, total_duration=2): return timeit_verbose( name_func + "(n_sleds, n_time)", globals=g, @@ -88,6 +88,7 @@ def timeit(name_func, name): print_time=False, norm=norm, max_length_name=max_length_name, + total_duration=total_duration, ) timeit("bench", name="PicoNumpy (CPython C-API)") @@ -95,14 +96,14 @@ def timeit(name_func, name): timeit("bench_hpy", name="PicoNumpy (HPy CPy ABI)") timeit("bench_hpy_universal", name="PicoNumpy (HPy Universal)") timeit("bench_pythran", name="Transonic-Pythran") -timeit("bench_numpy", name="Numpy") +timeit("bench_numpy", name="Numpy", total_duration=4) timeit( "bench_piconumpy_purepy", name="PicoNumpy (purepy)", ) timeit( "bench_piconumpy_purepy_array", name="PicoNumpy (purepy_array)", ) -timeit("bench_cython", name="PicoNumpy (Cython)") +timeit("bench_cython", name="PicoNumpy (Cython)", total_duration=4) """ ) From ba809f3dc55a249537a275013418f5c74d72b31c Mon Sep 17 00:00:00 2001 From: paugier Date: Tue, 12 Oct 2021 12:07:44 +0200 Subject: [PATCH 02/32] CI: use HPy 0.0.3 --- .github/workflows/tests.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 8083adf..3385e2c 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -24,9 +24,8 @@ jobs: - name: Install dependencies run: | - git clone -b master --single-branch https://github.com/hpyproject/hpy + git clone -b release/0.0.3 --single-branch https://github.com/hpyproject/hpy cd hpy - git checkout 7b45ce522 pip install . pip install numpy cython pytest transonic pythran @@ -47,5 +46,6 @@ jobs: - name: Run bench run: | cd bench + make tmp_result_julia.txt make bench_hpy make From 9e5aae731f104ab9c53a379649452a6e58cb7ed4 Mon Sep 17 00:00:00 2001 From: paugier Date: Tue, 12 Oct 2021 13:56:15 +0200 Subject: [PATCH 03/32] CI: pypy-3.7-nightly --- .github/workflows/tests.yml | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 3385e2c..7e1601d 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -8,7 +8,7 @@ jobs: strategy: max-parallel: 5 matrix: - python-version: [3.7, 3.8, 3.9] + python-version: [3.7, 3.8, 3.9, pypy-3.7-nightly] steps: @@ -22,11 +22,15 @@ jobs: with: python-version: ${{ matrix.python-version }} - - name: Install dependencies + - if: startsWith(matrix.python-version, 'pypy') != true + name: Install HPy (only for CPython) run: | git clone -b release/0.0.3 --single-branch https://github.com/hpyproject/hpy cd hpy pip install . + + - name: Install dependencies + run: | pip install numpy cython pytest transonic pythran - name: Checkout From d3025c54652a73ce519a0db7921ca2dc15899399 Mon Sep 17 00:00:00 2001 From: paugier Date: Tue, 12 Oct 2021 14:49:33 +0200 Subject: [PATCH 04/32] xfail 2 tests for PyPy --- piconumpy/test_cpython_capi.py | 1 + piconumpy/test_hpy_universal.py | 14 ++++++++++++++ 2 files changed, 15 insertions(+) diff --git a/piconumpy/test_cpython_capi.py b/piconumpy/test_cpython_capi.py index a1638dc..cedbed5 100644 --- a/piconumpy/test_cpython_capi.py +++ b/piconumpy/test_cpython_capi.py @@ -6,6 +6,7 @@ class Tests: piconumpy = _piconumpy_cpython_capi + def _array(self, *args): return self.piconumpy.array(*args) diff --git a/piconumpy/test_hpy_universal.py b/piconumpy/test_hpy_universal.py index fbf5ce4..2a470ca 100644 --- a/piconumpy/test_hpy_universal.py +++ b/piconumpy/test_hpy_universal.py @@ -1,3 +1,5 @@ +import sys + import pytest from .util_hpy import import_ext @@ -15,3 +17,15 @@ ) class TestsCPyABI(_Tests): piconumpy = piconumpy_universal + + def test_multiply(self): + if sys.implementation.name == "pypy": + pytest.xfail("Expected failure with PyPy (but should work)") + + super().test_multiply() + + def test_add(self): + if sys.implementation.name == "pypy": + pytest.xfail("Expected failure with PyPy (but should work)") + + super().test_add() From 0b809bc2f51b3063782260851892aba46e1d97f3 Mon Sep 17 00:00:00 2001 From: paugier Date: Tue, 12 Oct 2021 15:04:20 +0200 Subject: [PATCH 05/32] rm piconumpy/_piconumpy_hpy.py --- .github/workflows/tests.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 7e1601d..d989a0c 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -42,10 +42,11 @@ jobs: run: | python setup.py develop python setup.py --hpy-abi=universal develop + rm -f piconumpy/_piconumpy_hpy.py - name: Run tests run: | - pytest -s + pytest -v - name: Run bench run: | From f788f1a04d01993c982681f58953ef9fad80219f Mon Sep 17 00:00:00 2001 From: paugier Date: Tue, 12 Oct 2021 15:22:11 +0200 Subject: [PATCH 06/32] Skip too long benchmarks --- bench/make_bench_piconumpy.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/bench/make_bench_piconumpy.py b/bench/make_bench_piconumpy.py index c1a92d0..b02433f 100644 --- a/bench/make_bench_piconumpy.py +++ b/bench/make_bench_piconumpy.py @@ -96,14 +96,20 @@ def timeit(name_func, name, total_duration=2): timeit("bench_hpy", name="PicoNumpy (HPy CPy ABI)") timeit("bench_hpy_universal", name="PicoNumpy (HPy Universal)") timeit("bench_pythran", name="Transonic-Pythran") -timeit("bench_numpy", name="Numpy", total_duration=4) +try: + timeit("bench_numpy", name="Numpy", total_duration=8) +except RuntimeError: + print("Skip bench_numpy because it's too slow") timeit( "bench_piconumpy_purepy", name="PicoNumpy (purepy)", ) timeit( "bench_piconumpy_purepy_array", name="PicoNumpy (purepy_array)", ) -timeit("bench_cython", name="PicoNumpy (Cython)", total_duration=4) +try: + timeit("bench_cython", name="PicoNumpy (Cython)", total_duration=8) +except RuntimeError: + print("Skip bench_cython because it's too slow") """ ) From 6f1d5119f79675f5b81d261721a4a9c2b18ee9e2 Mon Sep 17 00:00:00 2001 From: paugier Date: Tue, 12 Oct 2021 15:25:49 +0200 Subject: [PATCH 07/32] rerun bench_hpy --- .github/workflows/tests.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index d989a0c..eff1152 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -54,3 +54,5 @@ jobs: make tmp_result_julia.txt make bench_hpy make + # let's rerun bench_hpy to get these results also at the end + make bench_hpy From aa6f146860e02aa8a4a4ddb89ac4d5e0c7a78c1c Mon Sep 17 00:00:00 2001 From: paugier Date: Tue, 12 Oct 2021 16:03:28 +0200 Subject: [PATCH 08/32] Small improvements --- bench/bench.jl | 2 +- bench/bench_cpy_vs_hpy.py | 7 ++++--- bench/make_bench_piconumpy.py | 3 +++ bench/profile_piconumpy.py | 2 ++ 4 files changed, 10 insertions(+), 4 deletions(-) diff --git a/bench/bench.jl b/bench/bench.jl index c9d08ef..bd98571 100644 --- a/bench/bench.jl +++ b/bench/bench.jl @@ -68,7 +68,7 @@ end n_sleds = 100 n_time = 200 -nb_runs = 50 +nb_runs = 200 times = zeros(nb_runs) diff --git a/bench/bench_cpy_vs_hpy.py b/bench/bench_cpy_vs_hpy.py index c97f3d8..38f1ba1 100644 --- a/bench/bench_cpy_vs_hpy.py +++ b/bench/bench_cpy_vs_hpy.py @@ -109,6 +109,7 @@ def main(): with open("tmp_result_julia.txt") as file: norm = float(file.read()) end = "" + print(f"Julia: {norm:.3f} seconds") else: norm = False end = "\n" @@ -116,14 +117,14 @@ def main(): t_capi = bench(pnp_capi, N_SLEDS, N_TIME) print(f"CPython C-API: {t_capi:.3f} seconds", end=end) if norm: - print(f" ({t_capi/norm:.1f} * Julia)") + print(f" ({t_capi/norm:4.1f} * Julia)") pnp_hpy_universal = import_piconumpy_hpy_universal() t_hpy_univ = bench(pnp_hpy_universal, N_SLEDS, N_TIME) print(f"HPy [Universal]: {t_hpy_univ:.3f} seconds", end=end) if norm: - print(f" ({t_hpy_univ/norm:.1f} * Julia)") + print(f" ({t_hpy_univ/norm:4.1f} * Julia)") if not IS_PYPY: import piconumpy._piconumpy_hpy as pnp_hpy @@ -132,7 +133,7 @@ def main(): print(f"HPy [CPy ABI]: {t_hpy_cpy_abi:.3f} seconds", end=end) if norm: - print(f" ({t_hpy_cpy_abi/norm:.1f} * Julia)") + print(f" ({t_hpy_cpy_abi/norm:4.1f} * Julia)") if IS_PYPY: import piconumpy.purepy as pnp_with_list diff --git a/bench/make_bench_piconumpy.py b/bench/make_bench_piconumpy.py index b02433f..4a76e9a 100644 --- a/bench/make_bench_piconumpy.py +++ b/bench/make_bench_piconumpy.py @@ -47,6 +47,7 @@ def create_tmp_file(name_module): import numpy as np from piconumpy import array from math import pi, cos, sin +from pprint import pprint IS_PYPY = hasattr(sys, 'pypy_version_info') """ @@ -65,6 +66,8 @@ def create_tmp_file(name_module): if not IS_PYPY: from tmp_hpy import bench as bench_hpy +pprint({key: sys.implementation.__dict__[key] for key in ("cache_tag", "version")}) + # get norm from Julia benchmark with open("tmp_result_julia.txt") as file: norm = float(file.read()) diff --git a/bench/profile_piconumpy.py b/bench/profile_piconumpy.py index b7de388..3bde5ae 100644 --- a/bench/profile_piconumpy.py +++ b/bench/profile_piconumpy.py @@ -7,12 +7,14 @@ import tmp_purepy import tmp_purepy_array import tmp_cython +import tmp_hpy_universal methods = { "cpython-c-api": bench_array1d, "purepy": tmp_purepy, "purepy_array": tmp_purepy_array, "cython": tmp_cython, + "universal": tmp_hpy_universal, } module = methods.get(sys.argv[-1], bench_array1d) From 968cc4362682564b08c7a269425f17679006bcb3 Mon Sep 17 00:00:00 2001 From: paugier Date: Wed, 13 Oct 2021 22:35:18 +0200 Subject: [PATCH 09/32] microbench loop_sum --- bench/microbench_loop_sum/Makefile | 13 +++++++ bench/microbench_loop_sum/README.md | 35 +++++++++++++++++++ bench/microbench_loop_sum/bench.jl | 21 ++++++++++++ bench/microbench_loop_sum/bench.py | 53 +++++++++++++++++++++++++++++ 4 files changed, 122 insertions(+) create mode 100644 bench/microbench_loop_sum/Makefile create mode 100644 bench/microbench_loop_sum/README.md create mode 100644 bench/microbench_loop_sum/bench.jl create mode 100644 bench/microbench_loop_sum/bench.py diff --git a/bench/microbench_loop_sum/Makefile b/bench/microbench_loop_sum/Makefile new file mode 100644 index 0000000..a4259be --- /dev/null +++ b/bench/microbench_loop_sum/Makefile @@ -0,0 +1,13 @@ + +all: tmp_result_julia.txt + @python -c "import sys; from pprint import pprint as p; p({key: sys.implementation.__dict__[key] for key in ('cache_tag', 'version')})" + @python bench.py list + @python bench.py purepy + @python bench.py _piconumpy_hpy + @python bench.py _piconumpy_cpython_capi + +tmp_result_julia.txt: bench.jl + @julia bench.jl > tmp_result_julia.txt + +clean: + rm -f tmp_result_julia.txt \ No newline at end of file diff --git a/bench/microbench_loop_sum/README.md b/bench/microbench_loop_sum/README.md new file mode 100644 index 0000000..93aa9d6 --- /dev/null +++ b/bench/microbench_loop_sum/README.md @@ -0,0 +1,35 @@ +# Microbenchmark sum_loop + +We measure the performance for this function: + +```python +def sum_loop(arr): + result = 0.0 + for value in arr: + result += value + return result +``` + +One can run the benchmarks with `make`. + +With PyPy3.7, I get: + +``` +{'cache_tag': 'pypy37', + 'version': sys.pypy_version_info(major=7, minor=3, micro=6, releaselevel='final', serial=0)} +list : 1.75e-05 s ( 1.6 * Julia) +purepy : 1.95e-05 s ( 1.8 * Julia) +_piconumpy_hpy : 2.18e-04 s ( 20.5 * Julia) +_piconumpy_cpython_capi : 1.19e-03 s (112.1 * Julia) +``` + +With CPython: + +``` +{'cache_tag': 'cpython-39', + 'version': sys.version_info(major=3, minor=9, micro=6, releaselevel='final', serial=0)} +list : 2.65e-04 s ( 25.0 * Julia) +purepy : 1.27e-03 s (120.0 * Julia) +_piconumpy_hpy : 4.24e-04 s ( 39.9 * Julia) +_piconumpy_cpython_capi : 3.50e-04 s ( 33.0 * Julia) +``` \ No newline at end of file diff --git a/bench/microbench_loop_sum/bench.jl b/bench/microbench_loop_sum/bench.jl new file mode 100644 index 0000000..440c755 --- /dev/null +++ b/bench/microbench_loop_sum/bench.jl @@ -0,0 +1,21 @@ +using Statistics + +function sum_loop(arr) + result = 0. + for i in eachindex(arr) + result += arr[i] + end + return result +end + +size = 10000 +nb_runs = 200 + +times = zeros(nb_runs) + +for irun in 1:nb_runs + arr = rand(size) + times[irun] = @elapsed sum_loop(arr) +end + +println(median(times)) diff --git a/bench/microbench_loop_sum/bench.py b/bench/microbench_loop_sum/bench.py new file mode 100644 index 0000000..03f5040 --- /dev/null +++ b/bench/microbench_loop_sum/bench.py @@ -0,0 +1,53 @@ +import sys +from time import perf_counter +from pathlib import Path +from random import random + +import numpy as np + +tmp_result_julia = Path("tmp_result_julia.txt") +if tmp_result_julia.exists(): + with open("tmp_result_julia.txt") as file: + norm = float(file.read()) +else: + print("tmp_result_julia.txt does not exist. First execute with `make`") + +try: + method = sys.argv[1] +except IndexError: + method = "purepy" + + +def sum_loop(arr): + result = 0.0 + for value in arr: + result += value + return result + + +if method == "_piconumpy_hpy": + from piconumpy.util_hpy import import_ext + + ext = import_ext() + array = ext.array +elif method == "list": + array = list +else: + d = {} + exec(f"from piconumpy.{method} import array", d) + array = d["array"] + +# print(array) + +size = 10000 +times = [] +nb_runs = 200 +for _ in range(nb_runs): + data_as_list = [random() for _ in range(size)] + arr = array(data_as_list) + t_start = perf_counter() + sum_loop(arr) + times.append(perf_counter() - t_start) + +time = np.median(times) +print(f"{method:30s}: {time:.2e} s ({time / norm:5.1f} * Julia)") From 20fbba36a1f5bedcb8f21e64692025a7c71d9269 Mon Sep 17 00:00:00 2001 From: paugier Date: Wed, 13 Oct 2021 22:52:52 +0200 Subject: [PATCH 10/32] microbench_loop_sum: add numpy --- bench/microbench_loop_sum/Makefile | 1 + bench/microbench_loop_sum/README.md | 16 +++++++++------- bench/microbench_loop_sum/bench.py | 2 ++ 3 files changed, 12 insertions(+), 7 deletions(-) diff --git a/bench/microbench_loop_sum/Makefile b/bench/microbench_loop_sum/Makefile index a4259be..a54982e 100644 --- a/bench/microbench_loop_sum/Makefile +++ b/bench/microbench_loop_sum/Makefile @@ -3,6 +3,7 @@ all: tmp_result_julia.txt @python -c "import sys; from pprint import pprint as p; p({key: sys.implementation.__dict__[key] for key in ('cache_tag', 'version')})" @python bench.py list @python bench.py purepy + @python bench.py numpy @python bench.py _piconumpy_hpy @python bench.py _piconumpy_cpython_capi diff --git a/bench/microbench_loop_sum/README.md b/bench/microbench_loop_sum/README.md index 93aa9d6..0b4cc89 100644 --- a/bench/microbench_loop_sum/README.md +++ b/bench/microbench_loop_sum/README.md @@ -17,9 +17,10 @@ With PyPy3.7, I get: ``` {'cache_tag': 'pypy37', 'version': sys.pypy_version_info(major=7, minor=3, micro=6, releaselevel='final', serial=0)} -list : 1.75e-05 s ( 1.6 * Julia) -purepy : 1.95e-05 s ( 1.8 * Julia) -_piconumpy_hpy : 2.18e-04 s ( 20.5 * Julia) +list : 1.73e-05 s ( 1.6 * Julia) +purepy : 1.97e-05 s ( 1.9 * Julia) +numpy : 4.12e-03 s (388.6 * Julia) +_piconumpy_hpy : 2.14e-04 s ( 20.2 * Julia) _piconumpy_cpython_capi : 1.19e-03 s (112.1 * Julia) ``` @@ -28,8 +29,9 @@ With CPython: ``` {'cache_tag': 'cpython-39', 'version': sys.version_info(major=3, minor=9, micro=6, releaselevel='final', serial=0)} -list : 2.65e-04 s ( 25.0 * Julia) -purepy : 1.27e-03 s (120.0 * Julia) -_piconumpy_hpy : 4.24e-04 s ( 39.9 * Julia) -_piconumpy_cpython_capi : 3.50e-04 s ( 33.0 * Julia) +list : 2.62e-04 s ( 24.7 * Julia) +purepy : 1.25e-03 s (118.2 * Julia) +numpy : 8.66e-04 s ( 81.6 * Julia) +_piconumpy_hpy : 4.22e-04 s ( 39.8 * Julia) +_piconumpy_cpython_capi : 3.53e-04 s ( 33.3 * Julia) ``` \ No newline at end of file diff --git a/bench/microbench_loop_sum/bench.py b/bench/microbench_loop_sum/bench.py index 03f5040..db6e6fc 100644 --- a/bench/microbench_loop_sum/bench.py +++ b/bench/microbench_loop_sum/bench.py @@ -32,6 +32,8 @@ def sum_loop(arr): array = ext.array elif method == "list": array = list +elif method == "numpy": + array = np.array else: d = {} exec(f"from piconumpy.{method} import array", d) From 47ebd2e0cc4124cd108f1b387efd1f6766bccb17 Mon Sep 17 00:00:00 2001 From: paugier Date: Fri, 12 Nov 2021 15:04:29 +0100 Subject: [PATCH 11/32] Update bench + PyPy traces --- bench/microbench_loop_sum/Makefile | 7 +- bench/microbench_loop_sum/README.md | 166 ++++++++++++++++++++++++++-- bench/microbench_loop_sum/bench.py | 9 +- 3 files changed, 169 insertions(+), 13 deletions(-) diff --git a/bench/microbench_loop_sum/Makefile b/bench/microbench_loop_sum/Makefile index a54982e..e394385 100644 --- a/bench/microbench_loop_sum/Makefile +++ b/bench/microbench_loop_sum/Makefile @@ -11,4 +11,9 @@ tmp_result_julia.txt: bench.jl @julia bench.jl > tmp_result_julia.txt clean: - rm -f tmp_result_julia.txt \ No newline at end of file + rm -f tmp_*.txt + +produce_traces: tmp_result_julia.txt + PYPYLOG=jit-log-opt,jit-summary,jit-backend-counts:tmp_traces_list.txt pypy bench.py list + PYPYLOG=jit-log-opt,jit-summary,jit-backend-counts:tmp_traces_piconumpy_list.txt pypy bench.py purepy + PYPYLOG=jit-log-opt,jit-summary,jit-backend-counts:tmp_traces_piconumpy_hpy.txt pypy bench.py _piconumpy_hpy diff --git a/bench/microbench_loop_sum/README.md b/bench/microbench_loop_sum/README.md index 0b4cc89..dade95a 100644 --- a/bench/microbench_loop_sum/README.md +++ b/bench/microbench_loop_sum/README.md @@ -17,11 +17,11 @@ With PyPy3.7, I get: ``` {'cache_tag': 'pypy37', 'version': sys.pypy_version_info(major=7, minor=3, micro=6, releaselevel='final', serial=0)} -list : 1.73e-05 s ( 1.6 * Julia) -purepy : 1.97e-05 s ( 1.9 * Julia) -numpy : 4.12e-03 s (388.6 * Julia) -_piconumpy_hpy : 2.14e-04 s ( 20.2 * Julia) -_piconumpy_cpython_capi : 1.19e-03 s (112.1 * Julia) +list : 1.34e-05 s ( 1.3 * Julia) +piconumpy.purepy : 1.33e-05 s ( 1.3 * Julia) +numpy : 4.00e-03 s (376.6 * Julia) +_piconumpy_hpy : 1.99e-04 s ( 18.8 * Julia) +_piconumpy_cpython_capi : 1.27e-03 s (119.5 * Julia) ``` With CPython: @@ -29,9 +29,153 @@ With CPython: ``` {'cache_tag': 'cpython-39', 'version': sys.version_info(major=3, minor=9, micro=6, releaselevel='final', serial=0)} -list : 2.62e-04 s ( 24.7 * Julia) -purepy : 1.25e-03 s (118.2 * Julia) -numpy : 8.66e-04 s ( 81.6 * Julia) -_piconumpy_hpy : 4.22e-04 s ( 39.8 * Julia) -_piconumpy_cpython_capi : 3.53e-04 s ( 33.3 * Julia) -``` \ No newline at end of file +list : 2.62e-04 s ( 24.6 * Julia) +piconumpy.purepy : 1.25e-03 s (117.5 * Julia) +numpy : 7.35e-04 s ( 69.2 * Julia) +_piconumpy_hpy : 4.26e-04 s ( 40.2 * Julia) +_piconumpy_cpython_capi : 3.52e-04 s ( 33.1 * Julia) +``` + +- PyPy is fast with list (1.3 * Julia, same order of magnitude that with Julia) +and as fast for a piconumpy array based on a list ("piconumpy.purepy", zero +cost abstraction!) + +- Numpy and _piconumpy_cpython_capi are both much slower with PyPy than with +Cpython. We can guess that the Numpy port to HPy would fix that. + +- piconumpy_hpy is a bit faster with PyPy (19 * Julia) than with CPython (40 * +Julia), however, we see that PyPy does not strongly accelerate piconumpy_hpy +(19 * Julia, 14 * piconumpy_list). + +## Traces PyPy `sum_loop` + +### List + +``` ++557: label(p0, p1, p6, p9, f35, f30, p15, p22, p26, i32, i27, p29, descr=TargetToken(140447503809120)) +debug_merge_point(0, 0, 'sum_loop;bench.py:23-25~#10 FOR_ITER') ++606: i44 = uint_ge(i32, i27) +guard_false(i44, descr=) [p0, p6, p9, p15, p1, i32, i27, i44, p26, f30, f35] ++615: f45 = getarrayitem_gc_f(p29, i32, descr=) ++622: i47 = int_add(i32, 1) +debug_merge_point(0, 0, 'sum_loop;bench.py:23-25~#12 STORE_FAST') +debug_merge_point(0, 0, 'sum_loop;bench.py:23-26~#14 LOAD_FAST') +debug_merge_point(0, 0, 'sum_loop;bench.py:23-26~#16 LOAD_FAST') +debug_merge_point(0, 0, 'sum_loop;bench.py:23-26~#18 INPLACE_ADD') ++626: f48 = float_add(f35, f45) +debug_merge_point(0, 0, 'sum_loop;bench.py:23-26~#20 STORE_FAST') +debug_merge_point(0, 0, 'sum_loop;bench.py:23-26~#22 JUMP_ABSOLUTE') ++630: setfield_gc(p15, i47, descr=) ++634: guard_not_invalidated(descr=) [p0, p6, p9, p15, p1, f45, f48, None, None] ++634: i51 = getfield_raw_i(140447672379264, descr=) ++647: i53 = int_sub(i51, 1) ++651: setfield_raw(140447672379264, i53, descr=) ++654: i56 = int_lt(i53, 0) ++658: guard_false(i56, descr=) [p0, p6, p9, p15, p1, i53, f45, f48, None, None] +debug_merge_point(0, 0, 'sum_loop;bench.py:23-25~#10 FOR_ITER') ++664: i57 = arraylen_gc(p29, descr=) ++664: jump(p0, p1, p6, p9, f48, f45, p15, p22, p26, i47, i27, p29, descr=TargetToken(140447503809120)) +``` + +### piconumpy purepy (based on list) + +``` ++705: label(p0, p1, p6, p9, f53, f46, p15, p22, i49, p29, p38, p42, i43, p45, descr=TargetToken(139748702723776)) +debug_merge_point(0, 0, 'sum_loop;bench.py:23-25~#10 FOR_ITER') ++760: guard_not_invalidated(descr=) [p0, p6, p9, p15, p1, p22, i49, f46, f53] ++760: p62 = force_token() ++760: enter_portal_frame(21, 28364) +debug_merge_point(1, 1, '__getitem__;/home/pierre/Dev/piconumpy/piconumpy/purepy.py:27-28~#0 LOAD_FAST') +debug_merge_point(1, 1, '__getitem__;/home/pierre/Dev/piconumpy/piconumpy/purepy.py:27-28~#2 LOAD_ATTR') +debug_merge_point(1, 1, '__getitem__;/home/pierre/Dev/piconumpy/piconumpy/purepy.py:27-28~#4 LOAD_FAST') +debug_merge_point(1, 1, '__getitem__;/home/pierre/Dev/piconumpy/piconumpy/purepy.py:27-28~#6 BINARY_SUBSCR') ++760: i65 = uint_ge(i49, i43) ++763: guard_false(i65, descr=) [p0, p6, p9, p15, p1, p22, i49, f46, f53] ++769: f66 = getarrayitem_gc_f(p45, i49, descr=) +debug_merge_point(1, 1, '__getitem__;/home/pierre/Dev/piconumpy/piconumpy/purepy.py:27-28~#8 RETURN_VALUE') ++776: leave_portal_frame(21) ++776: i69 = int_add(i49, 1) +debug_merge_point(0, 0, 'sum_loop;bench.py:23-25~#12 STORE_FAST') +debug_merge_point(0, 0, 'sum_loop;bench.py:23-26~#14 LOAD_FAST') +debug_merge_point(0, 0, 'sum_loop;bench.py:23-26~#16 LOAD_FAST') +debug_merge_point(0, 0, 'sum_loop;bench.py:23-26~#18 INPLACE_ADD') ++780: f70 = float_add(f53, f66) +debug_merge_point(0, 0, 'sum_loop;bench.py:23-26~#20 STORE_FAST') +debug_merge_point(0, 0, 'sum_loop;bench.py:23-26~#22 JUMP_ABSOLUTE') ++784: i72 = getfield_raw_i(139748871243648, descr=) ++797: i74 = int_sub(i72, 3) ++801: setfield_raw(139748871243648, i74, descr=) ++804: setfield_gc(p15, i69, descr=) ++808: i77 = int_lt(i74, 0) ++812: guard_false(i77, descr=) [p0, p6, p9, p15, p1, i74, f66, f70, None, None, None] +debug_merge_point(0, 0, 'sum_loop;bench.py:23-25~#10 FOR_ITER') ++818: i78 = arraylen_gc(p45, descr=) ++818: jump(p0, p1, p6, p9, f70, f66, p15, p22, i69, p29, p38, p42, i43, p45, descr=TargetToken(139748702723776)) +``` + +### piconumpy hpy + +``` ++1339: label(p0, p1, p6, p9, f73, p63, p15, i68, p62, descr=TargetToken(139865876151520)) +debug_merge_point(0, 0, 'sum_loop;bench.py:23-25~#10 FOR_ITER') ++1352: p82 = getfield_gc_r(p15, descr=) ++1356: guard_nonnull_class(p82, 139866025815200, descr=) [p0, p6, p9, p63, p15, p1, p82, f73] ++1376: p84 = getfield_gc_r(p82, descr=) ++1387: guard_value(p84, ConstPtr(ptr85), descr=) [p0, p6, p9, p63, p15, p1, p82, f73] ++1396: guard_not_invalidated(descr=) [p0, p6, p9, p63, p15, p1, p82, f73] ++1403: p87 = getfield_gc_r(ConstPtr(ptr86), descr=) ++1414: guard_value(p87, ConstPtr(ptr88), descr=) [p0, p6, p9, p63, p15, p1, p82, f73] ++1423: i90 = getfield_gc_i(ConstPtr(ptr89), descr=) ++1427: i92 = int_lt(i68, 0) ++1431: guard_false(i92, descr=) [p0, p6, p9, p63, p15, p1, p82, f73] ++1444: i94 = getfield_gc_i(ConstPtr(ptr93), descr=) ++1448: i95 = int_is_zero(i94) ++1451: guard_false(i95, descr=) [p0, p6, p9, p63, p15, p1, p82, f73] ++1457: i97 = int_sub(i94, 1) ++1461: p99 = getfield_gc_r(ConstPtr(ptr98), descr=) ++1465: i100 = getarrayitem_gc_i(p99, i97, descr=) ++1470: i101 = arraylen_gc(p99, descr=) ++1474: i103 = int_rshift(i101, 1) ++1477: i105 = int_sub(i103, 5) ++1481: i106 = int_lt(i97, i105) ++1484: cond_call(i106, ConstClass(_ll_list_resize_hint_really_look_inside_iff__listPtr_Signed_Bool), ConstPtr(ptr108), i97, 0, descr=) ++1490: guard_no_exception(descr=) [p0, p6, p9, p63, p15, p1, i68, i90, i100, p82, i97, f73] ++1490: setfield_gc(ConstPtr(ptr110), i97, descr=) ++1494: i112 = int_lt(i100, 0) ++1498: guard_false(i112, descr=) [p0, p6, p9, p63, p15, p1, i68, i90, i100, p82, f73] ++1522: setarrayitem_gc(p62, i100, p82, descr=) ++1527: p113 = force_token() ++1548: setfield_gc(p0, p113, descr=) ++1552: i115 = call_may_force_i(i90, 139866044538144, i100, i68, descr=) ++1663: guard_not_forced(descr=) [p0, p6, p9, p63, p15, p1, i100, i115, i68, p82, f73] ++1674: guard_no_exception(descr=) [p0, p6, p9, p63, p15, p1, i100, i115, i68, p82, f73] ++1688: call_n(ConstClass(close), i100, descr=) ++1754: guard_no_exception(descr=) [p0, p6, p9, p63, p15, p1, i115, i68, p82, f73] ++1768: i117 = int_is_true(i115) ++1771: guard_true(i117, descr=) [p0, p6, p9, p63, p15, p1, i115, i68, p82, f73] ++1784: p119 = getfield_gc_r(ConstPtr(ptr118), descr=) ++1788: p120 = getarrayitem_gc_r(p119, i115, descr=) ++1793: call_n(ConstClass(close), i115, descr=) ++1866: guard_no_exception(descr=) [p0, p6, p9, p63, p15, p1, p120, i68, p82, f73] ++1880: guard_nonnull_class(p120, ConstClass(W_FloatObject), descr=) [p0, p6, p9, p63, p15, p1, p120, i68, p82, f73] ++1907: i123 = getfield_gc_i(p15, descr=) ++1918: i125 = int_add(i123, 1) +debug_merge_point(0, 0, 'sum_loop;bench.py:23-25~#12 STORE_FAST') +debug_merge_point(0, 0, 'sum_loop;bench.py:23-26~#14 LOAD_FAST') +debug_merge_point(0, 0, 'sum_loop;bench.py:23-26~#16 LOAD_FAST') +debug_merge_point(0, 0, 'sum_loop;bench.py:23-26~#18 INPLACE_ADD') ++1923: setfield_gc(p15, i125, descr=) ++1927: f126 = getfield_gc_f(p120, descr=) ++1933: f127 = float_add(f73, f126) +debug_merge_point(0, 0, 'sum_loop;bench.py:23-26~#20 STORE_FAST') +debug_merge_point(0, 0, 'sum_loop;bench.py:23-26~#22 JUMP_ABSOLUTE') ++1947: guard_not_invalidated(descr=) [p0, p6, p9, p120, p15, p1, f127, None, None, None] ++1947: i129 = getfield_raw_i(139866044675968, descr=) ++1960: i131 = int_sub(i129, 3) ++1964: setfield_raw(139866044675968, i131, descr=) ++1967: i134 = int_lt(i131, 0) ++1971: guard_false(i134, descr=) [p0, p6, p9, p120, p15, p1, i131, f127, None, None, None] +debug_merge_point(0, 0, 'sum_loop;bench.py:23-25~#10 FOR_ITER') ++1977: i135 = arraylen_gc(p119, descr=) ++1977: jump(p0, p1, p6, p9, f127, p120, p15, i125, p119, descr=TargetToken(139865876151520)) +``` diff --git a/bench/microbench_loop_sum/bench.py b/bench/microbench_loop_sum/bench.py index db6e6fc..51c725d 100644 --- a/bench/microbench_loop_sum/bench.py +++ b/bench/microbench_loop_sum/bench.py @@ -5,6 +5,8 @@ import numpy as np +on_pypy = sys.implementation.name == 'pypy' + tmp_result_julia = Path("tmp_result_julia.txt") if tmp_result_julia.exists(): with open("tmp_result_julia.txt") as file: @@ -24,6 +26,10 @@ def sum_loop(arr): result += value return result +nb_runs = 500 +if on_pypy and method in ["list", "_piconumpy_hpy", "purepy"]: + nb_runs = 5000 + if method == "_piconumpy_hpy": from piconumpy.util_hpy import import_ext @@ -38,12 +44,13 @@ def sum_loop(arr): d = {} exec(f"from piconumpy.{method} import array", d) array = d["array"] + if "piconumpy" not in method: + method = f"piconumpy.{method}" # print(array) size = 10000 times = [] -nb_runs = 200 for _ in range(nb_runs): data_as_list = [random() for _ in range(size)] arr = array(data_as_list) From 431abefb83852446241e4f44fbed45ef2d62445f Mon Sep 17 00:00:00 2001 From: paugier Date: Wed, 8 Dec 2021 17:19:40 +0100 Subject: [PATCH 12/32] Add more simple low level benchmarks --- Makefile | 1 + bench/microbench_loop_sum/Makefile | 39 ++++++++++- bench/microbench_loop_sum/bench.jl | 4 +- bench/microbench_loop_sum/bench.py | 66 ++++++++++++++++--- bench/microbench_loop_sum/bench_cort.jl | 35 ++++++++++ bench/microbench_loop_sum/bench_init_zeros.jl | 21 ++++++ .../bench_sum_loop_index.jl | 23 +++++++ 7 files changed, 177 insertions(+), 12 deletions(-) create mode 100644 bench/microbench_loop_sum/bench_cort.jl create mode 100644 bench/microbench_loop_sum/bench_init_zeros.jl create mode 100644 bench/microbench_loop_sum/bench_sum_loop_index.jl diff --git a/Makefile b/Makefile index f7f7c54..3ed0ded 100644 --- a/Makefile +++ b/Makefile @@ -24,6 +24,7 @@ build_ext_universal: build_ext: $(PYTHON) setup.py build_ext -if + rm -f piconumpy/_piconumpy_hpy.py full: $(PYTHON) -m pip install -e .[full] diff --git a/bench/microbench_loop_sum/Makefile b/bench/microbench_loop_sum/Makefile index e394385..c8f534d 100644 --- a/bench/microbench_loop_sum/Makefile +++ b/bench/microbench_loop_sum/Makefile @@ -1,5 +1,5 @@ -all: tmp_result_julia.txt +bench: tmp_julia_sum_loop.txt @python -c "import sys; from pprint import pprint as p; p({key: sys.implementation.__dict__[key] for key in ('cache_tag', 'version')})" @python bench.py list @python bench.py purepy @@ -7,8 +7,41 @@ all: tmp_result_julia.txt @python bench.py _piconumpy_hpy @python bench.py _piconumpy_cpython_capi -tmp_result_julia.txt: bench.jl - @julia bench.jl > tmp_result_julia.txt +bench_sum_loop_index: tmp_julia_sum_loop_index.txt + @python -c "import sys; from pprint import pprint as p; p({key: sys.implementation.__dict__[key] for key in ('cache_tag', 'version')})" + @python bench.py list sum_loop_index + @python bench.py purepy sum_loop_index + @python bench.py numpy sum_loop_index + @python bench.py _piconumpy_hpy sum_loop_index + @python bench.py _piconumpy_cpython_capi sum_loop_index + +bench_cort: tmp_julia_cort.txt + @python -c "import sys; from pprint import pprint as p; p({key: sys.implementation.__dict__[key] for key in ('cache_tag', 'version')})" + @python bench.py list cort + @python bench.py purepy cort + @python bench.py numpy cort + @python bench.py _piconumpy_hpy cort + @python bench.py _piconumpy_cpython_capi cort + +bench_init_zeros: tmp_julia_init_zeros.txt + @python -c "import sys; from pprint import pprint as p; p({key: sys.implementation.__dict__[key] for key in ('cache_tag', 'version')})" + @python bench.py list init_zeros + @python bench.py purepy init_zeros + @python bench.py numpy init_zeros + @python bench.py _piconumpy_hpy init_zeros + @python bench.py _piconumpy_cpython_capi init_zeros + +tmp_julia_sum_loop.txt: bench.jl + @julia bench.jl > tmp_julia_sum_loop.txt + +tmp_julia_sum_loop_index.txt: bench_sum_loop_index.jl + @julia bench_sum_loop_index.jl > tmp_julia_sum_loop_index.txt + +tmp_julia_cort.txt: bench_cort.jl + @julia bench_cort.jl > tmp_julia_cort.txt + +tmp_julia_init_zeros.txt: bench_init_zeros.jl + @julia bench_init_zeros.jl > tmp_julia_init_zeros.txt clean: rm -f tmp_*.txt diff --git a/bench/microbench_loop_sum/bench.jl b/bench/microbench_loop_sum/bench.jl index 440c755..5c38b52 100644 --- a/bench/microbench_loop_sum/bench.jl +++ b/bench/microbench_loop_sum/bench.jl @@ -8,6 +8,8 @@ function sum_loop(arr) return result end +compute_from_arr = sum_loop + size = 10000 nb_runs = 200 @@ -15,7 +17,7 @@ times = zeros(nb_runs) for irun in 1:nb_runs arr = rand(size) - times[irun] = @elapsed sum_loop(arr) + times[irun] = @elapsed compute_from_arr(arr) end println(median(times)) diff --git a/bench/microbench_loop_sum/bench.py b/bench/microbench_loop_sum/bench.py index 51c725d..321fe5e 100644 --- a/bench/microbench_loop_sum/bench.py +++ b/bench/microbench_loop_sum/bench.py @@ -2,23 +2,31 @@ from time import perf_counter from pathlib import Path from random import random +from math import sqrt import numpy as np on_pypy = sys.implementation.name == 'pypy' -tmp_result_julia = Path("tmp_result_julia.txt") -if tmp_result_julia.exists(): - with open("tmp_result_julia.txt") as file: - norm = float(file.read()) -else: - print("tmp_result_julia.txt does not exist. First execute with `make`") - try: method = sys.argv[1] except IndexError: method = "purepy" +try: + name_bench = sys.argv[2] +except IndexError: + name_bench = "sum_loop" + + +tmp_result_julia = Path(f"tmp_julia_{name_bench}.txt") +if tmp_result_julia.exists(): + with open(tmp_result_julia) as file: + norm = float(file.read()) +else: + print(f"{tmp_result_julia} does not exist. First execute with `make`") + + def sum_loop(arr): result = 0.0 @@ -31,6 +39,38 @@ def sum_loop(arr): nb_runs = 5000 +def sum_loop_index(arr): + result = 0.0 + for index in range(500): + result += arr[index] + return result + + +def init_zeros(arr): + for index in range(len(arr)): + arr[index] = 0.0 + + +def _cort(s1, s2): + num = 0.0 + sum_square_x = 0.0 + sum_square_y = 0.0 + for t in range(len(s1) - 1): + slope_1 = s1[t + 1] - s1[t] + slope_2 = s2[t + 1] - s2[t] + num += slope_1 * slope_2 + sum_square_x += slope_1 * slope_1 + sum_square_y += slope_2 * slope_2 + return num / (sqrt(sum_square_x * sum_square_y)) + + +def cort(arr): + return _cort(arr, arr) + + +compute_from_arr = locals()[name_bench] + + if method == "_piconumpy_hpy": from piconumpy.util_hpy import import_ext @@ -50,12 +90,22 @@ def sum_loop(arr): # print(array) size = 10000 + +# warming during ~ 1s +data_as_list = [random() for _ in range(size)] +arr = array(data_as_list) +t_start = perf_counter() +compute_from_arr(arr) +t_first = perf_counter() - t_start +for _ in range(round(1 / t_first)): + compute_from_arr(arr) + times = [] for _ in range(nb_runs): data_as_list = [random() for _ in range(size)] arr = array(data_as_list) t_start = perf_counter() - sum_loop(arr) + compute_from_arr(arr) times.append(perf_counter() - t_start) time = np.median(times) diff --git a/bench/microbench_loop_sum/bench_cort.jl b/bench/microbench_loop_sum/bench_cort.jl new file mode 100644 index 0000000..a816541 --- /dev/null +++ b/bench/microbench_loop_sum/bench_cort.jl @@ -0,0 +1,35 @@ +using Statistics + + +function cort(s1, s2) + num = 0.0 + sum_square_x = 0.0 + sum_square_y = 0.0 + for t in 1:length(s1)-1 + slope_1 = s1[t + 1] - s1[t] + slope_2 = s2[t + 1] - s2[t] + num += slope_1 * slope_2 + sum_square_x += slope_1 * slope_1 + sum_square_y += slope_2 * slope_2 + end + return num / (sqrt(sum_square_x * sum_square_y)) +end + +function use_cort(arr) + return cort(arr, arr) +end + + +compute_from_arr = use_cort + +size = 10000 +nb_runs = 200 + +times = zeros(nb_runs) + +for irun in 1:nb_runs + arr = rand(size) + times[irun] = @elapsed compute_from_arr(arr) +end + +println(median(times)) diff --git a/bench/microbench_loop_sum/bench_init_zeros.jl b/bench/microbench_loop_sum/bench_init_zeros.jl new file mode 100644 index 0000000..b6035e5 --- /dev/null +++ b/bench/microbench_loop_sum/bench_init_zeros.jl @@ -0,0 +1,21 @@ +using Statistics + +function init_zeros(arr) + for i in eachindex(arr) + arr[i] = 0.0 + end +end + +compute_from_arr = init_zeros + +size = 10000 +nb_runs = 200 + +times = zeros(nb_runs) + +for irun in 1:nb_runs + arr = rand(size) + times[irun] = @elapsed compute_from_arr(arr) +end + +println(median(times)) diff --git a/bench/microbench_loop_sum/bench_sum_loop_index.jl b/bench/microbench_loop_sum/bench_sum_loop_index.jl new file mode 100644 index 0000000..458e6c5 --- /dev/null +++ b/bench/microbench_loop_sum/bench_sum_loop_index.jl @@ -0,0 +1,23 @@ +using Statistics + +function sum_loop_index(arr) + result = 0. + for i = 1:500 + result += arr[i] + end + return result +end + +compute_from_arr = sum_loop_index + +size = 10000 +nb_runs = 200 + +times = zeros(nb_runs) + +for irun in 1:nb_runs + arr = rand(size) + times[irun] = @elapsed compute_from_arr(arr) +end + +println(median(times)) From b0143143213ba9967b78b547fe4ffef5216c495e Mon Sep 17 00:00:00 2001 From: paugier Date: Wed, 8 Dec 2021 17:21:15 +0100 Subject: [PATCH 13/32] New dir microbench_low_level --- bench/{microbench_loop_sum => microbench_low_level}/Makefile | 0 bench/{microbench_loop_sum => microbench_low_level}/README.md | 0 bench/{microbench_loop_sum => microbench_low_level}/bench.jl | 0 bench/{microbench_loop_sum => microbench_low_level}/bench.py | 0 bench/{microbench_loop_sum => microbench_low_level}/bench_cort.jl | 0 .../bench_init_zeros.jl | 0 .../bench_sum_loop_index.jl | 0 7 files changed, 0 insertions(+), 0 deletions(-) rename bench/{microbench_loop_sum => microbench_low_level}/Makefile (100%) rename bench/{microbench_loop_sum => microbench_low_level}/README.md (100%) rename bench/{microbench_loop_sum => microbench_low_level}/bench.jl (100%) rename bench/{microbench_loop_sum => microbench_low_level}/bench.py (100%) rename bench/{microbench_loop_sum => microbench_low_level}/bench_cort.jl (100%) rename bench/{microbench_loop_sum => microbench_low_level}/bench_init_zeros.jl (100%) rename bench/{microbench_loop_sum => microbench_low_level}/bench_sum_loop_index.jl (100%) diff --git a/bench/microbench_loop_sum/Makefile b/bench/microbench_low_level/Makefile similarity index 100% rename from bench/microbench_loop_sum/Makefile rename to bench/microbench_low_level/Makefile diff --git a/bench/microbench_loop_sum/README.md b/bench/microbench_low_level/README.md similarity index 100% rename from bench/microbench_loop_sum/README.md rename to bench/microbench_low_level/README.md diff --git a/bench/microbench_loop_sum/bench.jl b/bench/microbench_low_level/bench.jl similarity index 100% rename from bench/microbench_loop_sum/bench.jl rename to bench/microbench_low_level/bench.jl diff --git a/bench/microbench_loop_sum/bench.py b/bench/microbench_low_level/bench.py similarity index 100% rename from bench/microbench_loop_sum/bench.py rename to bench/microbench_low_level/bench.py diff --git a/bench/microbench_loop_sum/bench_cort.jl b/bench/microbench_low_level/bench_cort.jl similarity index 100% rename from bench/microbench_loop_sum/bench_cort.jl rename to bench/microbench_low_level/bench_cort.jl diff --git a/bench/microbench_loop_sum/bench_init_zeros.jl b/bench/microbench_low_level/bench_init_zeros.jl similarity index 100% rename from bench/microbench_loop_sum/bench_init_zeros.jl rename to bench/microbench_low_level/bench_init_zeros.jl diff --git a/bench/microbench_loop_sum/bench_sum_loop_index.jl b/bench/microbench_low_level/bench_sum_loop_index.jl similarity index 100% rename from bench/microbench_loop_sum/bench_sum_loop_index.jl rename to bench/microbench_low_level/bench_sum_loop_index.jl From 01effe959c449ee673f79c202a5be65098e8e5d1 Mon Sep 17 00:00:00 2001 From: paugier Date: Wed, 8 Dec 2021 22:02:44 +0100 Subject: [PATCH 14/32] Improve output benchmarks --- bench/microbench_low_level/Makefile | 15 +++++-- bench/microbench_low_level/README.md | 11 ++++- bench/microbench_low_level/bench.py | 9 +--- .../bench_sum_loop_index.jl | 2 +- bench/microbench_low_level/result_sum_loop.md | 41 +++++++++++++++++++ 5 files changed, 63 insertions(+), 15 deletions(-) create mode 100644 bench/microbench_low_level/result_sum_loop.md diff --git a/bench/microbench_low_level/Makefile b/bench/microbench_low_level/Makefile index c8f534d..0dba15a 100644 --- a/bench/microbench_low_level/Makefile +++ b/bench/microbench_low_level/Makefile @@ -1,6 +1,8 @@ +.PHONY : clean print_info bench bench_sum_loop_index bench_cort bench_init_zeros + bench: tmp_julia_sum_loop.txt - @python -c "import sys; from pprint import pprint as p; p({key: sys.implementation.__dict__[key] for key in ('cache_tag', 'version')})" + @$(MAKE) --no-print-directory print_info NAME_BENCH=sum_loop @python bench.py list @python bench.py purepy @python bench.py numpy @@ -8,7 +10,7 @@ bench: tmp_julia_sum_loop.txt @python bench.py _piconumpy_cpython_capi bench_sum_loop_index: tmp_julia_sum_loop_index.txt - @python -c "import sys; from pprint import pprint as p; p({key: sys.implementation.__dict__[key] for key in ('cache_tag', 'version')})" + @$(MAKE) --no-print-directory print_info NAME_BENCH=sum_loop_index @python bench.py list sum_loop_index @python bench.py purepy sum_loop_index @python bench.py numpy sum_loop_index @@ -16,7 +18,7 @@ bench_sum_loop_index: tmp_julia_sum_loop_index.txt @python bench.py _piconumpy_cpython_capi sum_loop_index bench_cort: tmp_julia_cort.txt - @python -c "import sys; from pprint import pprint as p; p({key: sys.implementation.__dict__[key] for key in ('cache_tag', 'version')})" + @$(MAKE) --no-print-directory print_info NAME_BENCH=cort @python bench.py list cort @python bench.py purepy cort @python bench.py numpy cort @@ -24,13 +26,18 @@ bench_cort: tmp_julia_cort.txt @python bench.py _piconumpy_cpython_capi cort bench_init_zeros: tmp_julia_init_zeros.txt - @python -c "import sys; from pprint import pprint as p; p({key: sys.implementation.__dict__[key] for key in ('cache_tag', 'version')})" + @$(MAKE) --no-print-directory print_info NAME_BENCH=init_zeros @python bench.py list init_zeros @python bench.py purepy init_zeros @python bench.py numpy init_zeros @python bench.py _piconumpy_hpy init_zeros @python bench.py _piconumpy_cpython_capi init_zeros +print_info: + @echo bench $(NAME_BENCH) + @python -c "from socket import gethostname as f; print('hostname:', f())" + @python -c "import sys; from pprint import pprint as p; p({key: sys.implementation.__dict__[key] for key in ('cache_tag', 'version')})" + tmp_julia_sum_loop.txt: bench.jl @julia bench.jl > tmp_julia_sum_loop.txt diff --git a/bench/microbench_low_level/README.md b/bench/microbench_low_level/README.md index dade95a..902b1dd 100644 --- a/bench/microbench_low_level/README.md +++ b/bench/microbench_low_level/README.md @@ -1,6 +1,13 @@ -# Microbenchmark sum_loop +# Microbenchmarks low level Python code -We measure the performance for this function: +We measure the performance for functions containing low level Python code. + +- `sum_loop`: `for value in arr` and summation +- `sum_loop_sum_loop_index`: `for index in range(5000)` +- `init_zeros`: set values to zeros +- `cort`: normalized cosine similarity measure between derivatives + +We measure the performance for functions containing low level Python code. ```python def sum_loop(arr): diff --git a/bench/microbench_low_level/bench.py b/bench/microbench_low_level/bench.py index 321fe5e..5813d6b 100644 --- a/bench/microbench_low_level/bench.py +++ b/bench/microbench_low_level/bench.py @@ -6,8 +6,6 @@ import numpy as np -on_pypy = sys.implementation.name == 'pypy' - try: method = sys.argv[1] except IndexError: @@ -27,21 +25,16 @@ print(f"{tmp_result_julia} does not exist. First execute with `make`") - def sum_loop(arr): result = 0.0 for value in arr: result += value return result -nb_runs = 500 -if on_pypy and method in ["list", "_piconumpy_hpy", "purepy"]: - nb_runs = 5000 - def sum_loop_index(arr): result = 0.0 - for index in range(500): + for index in range(5000): result += arr[index] return result diff --git a/bench/microbench_low_level/bench_sum_loop_index.jl b/bench/microbench_low_level/bench_sum_loop_index.jl index 458e6c5..b4c682c 100644 --- a/bench/microbench_low_level/bench_sum_loop_index.jl +++ b/bench/microbench_low_level/bench_sum_loop_index.jl @@ -2,7 +2,7 @@ using Statistics function sum_loop_index(arr) result = 0. - for i = 1:500 + for i = 1:5000 result += arr[i] end return result diff --git a/bench/microbench_low_level/result_sum_loop.md b/bench/microbench_low_level/result_sum_loop.md new file mode 100644 index 0000000..fd649cb --- /dev/null +++ b/bench/microbench_low_level/result_sum_loop.md @@ -0,0 +1,41 @@ +# Microbenchmark sum_loop + +We measure the performance for this function: + +```python +def sum_loop(arr): + result = 0.0 + for value in arr: + result += value + return result +``` + +One can run the benchmarks with `make`. + +With PyPy3.7, I get: + +``` +bench sum_loop +hostname: voyage +{'cache_tag': 'pypy37', + 'version': sys.pypy_version_info(major=7, minor=3, micro=7, releaselevel='final', serial=0)} +list : 2.34e-05 s ( 1.7 * Julia) +purepy : 2.41e-05 s ( 1.8 * Julia) +numpy : 8.91e-03 s (654.4 * Julia) +_piconumpy_hpy : 3.37e-04 s ( 24.8 * Julia) +_piconumpy_cpython_capi : 2.04e-03 s (150.1 * Julia) +``` + +With CPython: + +``` +bench sum_loop +hostname: voyage +{'cache_tag': 'cpython-39', + 'version': sys.version_info(major=3, minor=9, micro=7, releaselevel='final', serial=0)} +list : 3.59e-04 s ( 26.4 * Julia) +purepy : 2.18e-03 s (160.3 * Julia) +numpy : 1.12e-03 s ( 82.5 * Julia) +_piconumpy_hpy : 6.30e-04 s ( 46.2 * Julia) +_piconumpy_cpython_capi : 5.04e-04 s ( 37.0 * Julia) +``` From b653dfd3b4f5257ccb2ea807dd23faa6afd0d4c9 Mon Sep 17 00:00:00 2001 From: paugier Date: Thu, 9 Dec 2021 10:34:47 +0100 Subject: [PATCH 15/32] Clean up Makefile + result files --- bench/microbench_low_level/Makefile | 61 +++++++------------ bench/microbench_low_level/README.md | 15 +++-- .../{bench.jl => bench_sum_loop.jl} | 0 bench/microbench_low_level/result_cort.md | 50 +++++++++++++++ .../microbench_low_level/result_init_zeros.md | 39 ++++++++++++ bench/microbench_low_level/result_sum_loop.md | 2 +- .../result_sum_loop_index.md | 41 +++++++++++++ 7 files changed, 165 insertions(+), 43 deletions(-) rename bench/microbench_low_level/{bench.jl => bench_sum_loop.jl} (100%) create mode 100644 bench/microbench_low_level/result_cort.md create mode 100644 bench/microbench_low_level/result_init_zeros.md create mode 100644 bench/microbench_low_level/result_sum_loop_index.md diff --git a/bench/microbench_low_level/Makefile b/bench/microbench_low_level/Makefile index 0dba15a..e9f350e 100644 --- a/bench/microbench_low_level/Makefile +++ b/bench/microbench_low_level/Makefile @@ -1,45 +1,30 @@ -.PHONY : clean print_info bench bench_sum_loop_index bench_cort bench_init_zeros - -bench: tmp_julia_sum_loop.txt - @$(MAKE) --no-print-directory print_info NAME_BENCH=sum_loop - @python bench.py list - @python bench.py purepy - @python bench.py numpy - @python bench.py _piconumpy_hpy - @python bench.py _piconumpy_cpython_capi - -bench_sum_loop_index: tmp_julia_sum_loop_index.txt - @$(MAKE) --no-print-directory print_info NAME_BENCH=sum_loop_index - @python bench.py list sum_loop_index - @python bench.py purepy sum_loop_index - @python bench.py numpy sum_loop_index - @python bench.py _piconumpy_hpy sum_loop_index - @python bench.py _piconumpy_cpython_capi sum_loop_index - -bench_cort: tmp_julia_cort.txt - @$(MAKE) --no-print-directory print_info NAME_BENCH=cort - @python bench.py list cort - @python bench.py purepy cort - @python bench.py numpy cort - @python bench.py _piconumpy_hpy cort - @python bench.py _piconumpy_cpython_capi cort - -bench_init_zeros: tmp_julia_init_zeros.txt - @$(MAKE) --no-print-directory print_info NAME_BENCH=init_zeros - @python bench.py list init_zeros - @python bench.py purepy init_zeros - @python bench.py numpy init_zeros - @python bench.py _piconumpy_hpy init_zeros - @python bench.py _piconumpy_cpython_capi init_zeros - -print_info: +.PHONY : clean bench_sum_loop bench_sum_loop_index bench_cort bench_init_zeros + +bench_sum_loop: NAME_BENCH=sum_loop +bench_sum_loop: tmp_julia_sum_loop.txt _bench + +bench_sum_loop_index: NAME_BENCH=sum_loop_index +bench_sum_loop_index: tmp_julia_sum_loop_index.txt _bench + +bench_cort: NAME_BENCH=cort +bench_cort: tmp_julia_cort.txt _bench + +bench_init_zeros: NAME_BENCH=init_zeros +bench_init_zeros: tmp_julia_init_zeros.txt _bench + +_bench: @echo bench $(NAME_BENCH) @python -c "from socket import gethostname as f; print('hostname:', f())" @python -c "import sys; from pprint import pprint as p; p({key: sys.implementation.__dict__[key] for key in ('cache_tag', 'version')})" - -tmp_julia_sum_loop.txt: bench.jl - @julia bench.jl > tmp_julia_sum_loop.txt + @python bench.py list $(NAME_BENCH) + @python bench.py purepy $(NAME_BENCH) + @python bench.py numpy $(NAME_BENCH) + @python bench.py _piconumpy_hpy $(NAME_BENCH) + @python bench.py _piconumpy_cpython_capi $(NAME_BENCH) + +tmp_julia_sum_loop.txt: bench_sum_loop.jl + @julia bench_sum_loop.jl > tmp_julia_sum_loop.txt tmp_julia_sum_loop_index.txt: bench_sum_loop_index.jl @julia bench_sum_loop_index.jl > tmp_julia_sum_loop_index.txt diff --git a/bench/microbench_low_level/README.md b/bench/microbench_low_level/README.md index 902b1dd..0f1e32f 100644 --- a/bench/microbench_low_level/README.md +++ b/bench/microbench_low_level/README.md @@ -2,10 +2,17 @@ We measure the performance for functions containing low level Python code. -- `sum_loop`: `for value in arr` and summation -- `sum_loop_sum_loop_index`: `for index in range(5000)` -- `init_zeros`: set values to zeros -- `cort`: normalized cosine similarity measure between derivatives +- `sum_loop` (command `make bench`): `for value in arr` and summation + +- `sum_loop_index` (command `make bench_sum_loop_index`): + `for index in range(5000)` and summation + +- `init_zeros` (command `make bench_init_zeros`): set values to zeros + +- `cort` (command `make bench_cort`): normalized cosine similarity measure + between derivatives + +The files result_*.txt contain few results. We measure the performance for functions containing low level Python code. diff --git a/bench/microbench_low_level/bench.jl b/bench/microbench_low_level/bench_sum_loop.jl similarity index 100% rename from bench/microbench_low_level/bench.jl rename to bench/microbench_low_level/bench_sum_loop.jl diff --git a/bench/microbench_low_level/result_cort.md b/bench/microbench_low_level/result_cort.md new file mode 100644 index 0000000..a1b5a22 --- /dev/null +++ b/bench/microbench_low_level/result_cort.md @@ -0,0 +1,50 @@ +# Microbenchmark cort + +We measure the performance for this function: + +```python +def cort(arr): + return _cort(arr, arr) + +def _cort(s1, s2): + num = 0.0 + sum_square_x = 0.0 + sum_square_y = 0.0 + for t in range(len(s1) - 1): + slope_1 = s1[t + 1] - s1[t] + slope_2 = s2[t + 1] - s2[t] + num += slope_1 * slope_2 + sum_square_x += slope_1 * slope_1 + sum_square_y += slope_2 * slope_2 + return num / (sqrt(sum_square_x * sum_square_y)) +``` + +One can run the benchmarks with `make bench_cort`. + +With PyPy3.7, I get: + +``` +bench cort +hostname: voyage +{'cache_tag': 'pypy37', + 'version': sys.pypy_version_info(major=7, minor=3, micro=7, releaselevel='final', serial=0)} +list : 4.29e-05 s ( 1.8 * Julia) +purepy : 3.89e-05 s ( 1.6 * Julia) +numpy : 4.72e-02 s (1957.9 * Julia) +_piconumpy_hpy : 1.36e-03 s ( 56.5 * Julia) +_piconumpy_cpython_capi : 8.03e-03 s (332.8 * Julia) +``` + +With CPython: + +``` +bench cort +hostname: voyage +{'cache_tag': 'cpython-39', + 'version': sys.version_info(major=3, minor=9, micro=7, releaselevel='final', serial=0)} +list : 4.47e-03 s (185.4 * Julia) +purepy : 1.08e-02 s (448.6 * Julia) +numpy : 9.69e-03 s (401.6 * Julia) +_piconumpy_hpy : 5.32e-03 s (220.5 * Julia) +_piconumpy_cpython_capi : 4.80e-03 s (198.9 * Julia) +``` diff --git a/bench/microbench_low_level/result_init_zeros.md b/bench/microbench_low_level/result_init_zeros.md new file mode 100644 index 0000000..1228d63 --- /dev/null +++ b/bench/microbench_low_level/result_init_zeros.md @@ -0,0 +1,39 @@ +# Microbenchmark sum_init_zeros + +We measure the performance for this function: + +```python +def init_zeros(arr): + for index in range(len(arr)): + arr[index] = 0.0 +``` + +One can run the benchmarks with `make bench_init_zeros`. + +With PyPy3.7, I get: + +``` +bench init_zeros +hostname: voyage +{'cache_tag': 'pypy37', + 'version': sys.pypy_version_info(major=7, minor=3, micro=7, releaselevel='final', serial=0)} +list : 2.53e-05 s ( 5.2 * Julia) +purepy : 2.65e-05 s ( 5.4 * Julia) +numpy : 9.01e-03 s (1848.7 * Julia) +_piconumpy_hpy : 4.17e-04 s ( 85.6 * Julia) +_piconumpy_cpython_capi : 1.10e-03 s (224.9 * Julia) +``` + +With CPython: + +``` +bench init_zeros +hostname: voyage +{'cache_tag': 'cpython-39', + 'version': sys.version_info(major=3, minor=9, micro=7, releaselevel='final', serial=0)} +list : 5.18e-04 s (106.3 * Julia) +purepy : 1.94e-03 s (397.8 * Julia) +numpy : 1.17e-03 s (239.3 * Julia) +_piconumpy_hpy : 6.46e-04 s (132.5 * Julia) +_piconumpy_cpython_capi : 5.43e-04 s (111.4 * Julia) +``` diff --git a/bench/microbench_low_level/result_sum_loop.md b/bench/microbench_low_level/result_sum_loop.md index fd649cb..556506c 100644 --- a/bench/microbench_low_level/result_sum_loop.md +++ b/bench/microbench_low_level/result_sum_loop.md @@ -10,7 +10,7 @@ def sum_loop(arr): return result ``` -One can run the benchmarks with `make`. +One can run the benchmarks with `make bench_sum_loop`. With PyPy3.7, I get: diff --git a/bench/microbench_low_level/result_sum_loop_index.md b/bench/microbench_low_level/result_sum_loop_index.md new file mode 100644 index 0000000..65a11c2 --- /dev/null +++ b/bench/microbench_low_level/result_sum_loop_index.md @@ -0,0 +1,41 @@ +# Microbenchmark sum_loop_index + +We measure the performance for this function: + +```python +def sum_loop_index(arr): + result = 0.0 + for index in range(5000): + result += arr[index] + return result +``` + +One can run the benchmarks with `make bench_sum_loop_index`. + +With PyPy3.7, I get: + +``` +bench sum_loop_index +hostname: voyage +{'cache_tag': 'pypy37', + 'version': sys.pypy_version_info(major=7, minor=3, micro=7, releaselevel='final', serial=0)} +list : 1.19e-05 s ( 2.0 * Julia) +purepy : 1.20e-05 s ( 2.1 * Julia) +numpy : 4.07e-03 s (692.9 * Julia) +_piconumpy_hpy : 1.65e-04 s ( 28.2 * Julia) +_piconumpy_cpython_capi : 9.95e-04 s (169.5 * Julia) +``` + +With CPython: + +``` +bench sum_loop_index +hostname: voyage +{'cache_tag': 'cpython-39', + 'version': sys.version_info(major=3, minor=9, micro=7, releaselevel='final', serial=0)} +list : 3.91e-04 s ( 66.6 * Julia) +purepy : 1.10e-03 s (186.9 * Julia) +numpy : 8.95e-04 s (152.5 * Julia) +_piconumpy_hpy : 4.85e-04 s ( 82.5 * Julia) +_piconumpy_cpython_capi : 4.15e-04 s ( 70.6 * Julia) +``` From 655e4925869d7027d3dc822404592b27f53682df Mon Sep 17 00:00:00 2001 From: paugier Date: Thu, 9 Dec 2021 14:49:36 +0100 Subject: [PATCH 16/32] Update results with GraalPython --- bench/microbench_low_level/bench.py | 26 +++++++++----- bench/microbench_low_level/result_cort.md | 32 ++++++++++++----- .../microbench_low_level/result_init_zeros.md | 32 ++++++++++++----- bench/microbench_low_level/result_sum_loop.md | 35 +++++++++++++------ .../result_sum_loop_index.md | 32 ++++++++++++----- 5 files changed, 111 insertions(+), 46 deletions(-) diff --git a/bench/microbench_low_level/bench.py b/bench/microbench_low_level/bench.py index 5813d6b..8524fd5 100644 --- a/bench/microbench_low_level/bench.py +++ b/bench/microbench_low_level/bench.py @@ -4,8 +4,6 @@ from random import random from math import sqrt -import numpy as np - try: method = sys.argv[1] except IndexError: @@ -72,6 +70,13 @@ def cort(arr): elif method == "list": array = list elif method == "numpy": + + try: + import numpy as np + except ImportError: + print(f"{method:30s}: ImportError numpy") + sys.exit(0) + array = np.array else: d = {} @@ -80,26 +85,29 @@ def cort(arr): if "piconumpy" not in method: method = f"piconumpy.{method}" -# print(array) - size = 10000 # warming during ~ 1s data_as_list = [random() for _ in range(size)] arr = array(data_as_list) t_start = perf_counter() -compute_from_arr(arr) -t_first = perf_counter() - t_start -for _ in range(round(1 / t_first)): +while perf_counter() - t_start < 1.0: compute_from_arr(arr) + +def median(sequence): + tmp = sorted(sequence) + return tmp[len(tmp) // 2] + + +t0 = perf_counter() times = [] -for _ in range(nb_runs): +while perf_counter() - t0 < 2.0: data_as_list = [random() for _ in range(size)] arr = array(data_as_list) t_start = perf_counter() compute_from_arr(arr) times.append(perf_counter() - t_start) -time = np.median(times) +time = median(times) print(f"{method:30s}: {time:.2e} s ({time / norm:5.1f} * Julia)") diff --git a/bench/microbench_low_level/result_cort.md b/bench/microbench_low_level/result_cort.md index a1b5a22..4b8ff6c 100644 --- a/bench/microbench_low_level/result_cort.md +++ b/bench/microbench_low_level/result_cort.md @@ -29,10 +29,10 @@ hostname: voyage {'cache_tag': 'pypy37', 'version': sys.pypy_version_info(major=7, minor=3, micro=7, releaselevel='final', serial=0)} list : 4.29e-05 s ( 1.8 * Julia) -purepy : 3.89e-05 s ( 1.6 * Julia) -numpy : 4.72e-02 s (1957.9 * Julia) -_piconumpy_hpy : 1.36e-03 s ( 56.5 * Julia) -_piconumpy_cpython_capi : 8.03e-03 s (332.8 * Julia) +purepy : 4.12e-05 s ( 1.7 * Julia) +numpy : 4.77e-02 s (1975.5 * Julia) +_piconumpy_hpy : 1.46e-03 s ( 60.5 * Julia) +_piconumpy_cpython_capi : 6.96e-03 s (288.5 * Julia) ``` With CPython: @@ -42,9 +42,23 @@ bench cort hostname: voyage {'cache_tag': 'cpython-39', 'version': sys.version_info(major=3, minor=9, micro=7, releaselevel='final', serial=0)} -list : 4.47e-03 s (185.4 * Julia) -purepy : 1.08e-02 s (448.6 * Julia) -numpy : 9.69e-03 s (401.6 * Julia) -_piconumpy_hpy : 5.32e-03 s (220.5 * Julia) -_piconumpy_cpython_capi : 4.80e-03 s (198.9 * Julia) +list : 4.42e-03 s (183.4 * Julia) +purepy : 1.04e-02 s (430.0 * Julia) +numpy : 9.76e-03 s (404.4 * Julia) +_piconumpy_hpy : 5.66e-03 s (234.7 * Julia) +_piconumpy_cpython_capi : 4.77e-03 s (197.7 * Julia) +``` + +With Python 3.8.5 (GraalVM CE Native 21.3.0) + +``` +bench cort +hostname: voyage +{'cache_tag': 'graalpython-38', + 'version': sys.version_info(major=3, minor=8, micro=5, releaselevel='alpha', serial=0)} +list : 2.44e-05 s ( 1.0 * Julia) +purepy : 3.13e-05 s ( 1.3 * Julia) +numpy : ImportError numpy +_piconumpy_hpy : 1.69e-04 s ( 7.0 * Julia) +_piconumpy_cpython_capi : 3.55e-04 s ( 14.7 * Julia) ``` diff --git a/bench/microbench_low_level/result_init_zeros.md b/bench/microbench_low_level/result_init_zeros.md index 1228d63..68eee34 100644 --- a/bench/microbench_low_level/result_init_zeros.md +++ b/bench/microbench_low_level/result_init_zeros.md @@ -17,11 +17,11 @@ bench init_zeros hostname: voyage {'cache_tag': 'pypy37', 'version': sys.pypy_version_info(major=7, minor=3, micro=7, releaselevel='final', serial=0)} -list : 2.53e-05 s ( 5.2 * Julia) -purepy : 2.65e-05 s ( 5.4 * Julia) -numpy : 9.01e-03 s (1848.7 * Julia) -_piconumpy_hpy : 4.17e-04 s ( 85.6 * Julia) -_piconumpy_cpython_capi : 1.10e-03 s (224.9 * Julia) +list : 2.63e-05 s ( 5.4 * Julia) +purepy : 2.99e-05 s ( 6.1 * Julia) +numpy : 1.17e-02 s (2403.5 * Julia) +_piconumpy_hpy : 4.58e-04 s ( 94.1 * Julia) +_piconumpy_cpython_capi : 8.46e-04 s (173.6 * Julia) ``` With CPython: @@ -31,9 +31,23 @@ bench init_zeros hostname: voyage {'cache_tag': 'cpython-39', 'version': sys.version_info(major=3, minor=9, micro=7, releaselevel='final', serial=0)} -list : 5.18e-04 s (106.3 * Julia) -purepy : 1.94e-03 s (397.8 * Julia) +list : 5.34e-04 s (109.6 * Julia) +purepy : 2.03e-03 s (417.4 * Julia) numpy : 1.17e-03 s (239.3 * Julia) -_piconumpy_hpy : 6.46e-04 s (132.5 * Julia) -_piconumpy_cpython_capi : 5.43e-04 s (111.4 * Julia) +_piconumpy_hpy : 7.51e-04 s (154.1 * Julia) +_piconumpy_cpython_capi : 5.44e-04 s (111.5 * Julia) +``` + +With Python 3.8.5 (GraalVM CE Native 21.3.0) + +``` +bench init_zeros +hostname: voyage +{'cache_tag': 'graalpython-38', + 'version': sys.version_info(major=3, minor=8, micro=5, releaselevel='alpha', serial=0)} +list : 1.37e-05 s ( 2.8 * Julia) +purepy : 1.93e-05 s ( 4.0 * Julia) +numpy : ImportError numpy +_piconumpy_hpy : 4.68e-05 s ( 9.6 * Julia) +_piconumpy_cpython_capi : 1.74e-04 s ( 35.8 * Julia) ``` diff --git a/bench/microbench_low_level/result_sum_loop.md b/bench/microbench_low_level/result_sum_loop.md index 556506c..f2d0a86 100644 --- a/bench/microbench_low_level/result_sum_loop.md +++ b/bench/microbench_low_level/result_sum_loop.md @@ -19,11 +19,11 @@ bench sum_loop hostname: voyage {'cache_tag': 'pypy37', 'version': sys.pypy_version_info(major=7, minor=3, micro=7, releaselevel='final', serial=0)} -list : 2.34e-05 s ( 1.7 * Julia) -purepy : 2.41e-05 s ( 1.8 * Julia) -numpy : 8.91e-03 s (654.4 * Julia) -_piconumpy_hpy : 3.37e-04 s ( 24.8 * Julia) -_piconumpy_cpython_capi : 2.04e-03 s (150.1 * Julia) +list : 2.35e-05 s ( 1.8 * Julia) +purepy : 2.60e-05 s ( 2.0 * Julia) +numpy : 8.97e-03 s (677.0 * Julia) +_piconumpy_hpy : 3.73e-04 s ( 28.2 * Julia) +_piconumpy_cpython_capi : 1.75e-03 s (132.4 * Julia) ``` With CPython: @@ -33,9 +33,24 @@ bench sum_loop hostname: voyage {'cache_tag': 'cpython-39', 'version': sys.version_info(major=3, minor=9, micro=7, releaselevel='final', serial=0)} -list : 3.59e-04 s ( 26.4 * Julia) -purepy : 2.18e-03 s (160.3 * Julia) -numpy : 1.12e-03 s ( 82.5 * Julia) -_piconumpy_hpy : 6.30e-04 s ( 46.2 * Julia) -_piconumpy_cpython_capi : 5.04e-04 s ( 37.0 * Julia) +list : 3.65e-04 s ( 27.5 * Julia) +purepy : 2.17e-03 s (164.1 * Julia) +numpy : 1.09e-03 s ( 82.2 * Julia) +_piconumpy_hpy : 7.39e-04 s ( 55.8 * Julia) +_piconumpy_cpython_capi : 5.07e-04 s ( 38.3 * Julia) + +``` + +With Python 3.8.5 (GraalVM CE Native 21.3.0) + +``` +bench sum_loop +hostname: voyage +{'cache_tag': 'graalpython-38', + 'version': sys.version_info(major=3, minor=8, micro=5, releaselevel='alpha', serial=0)} +list : 1.92e-05 s ( 1.4 * Julia) +purepy : 3.61e-05 s ( 2.7 * Julia) +numpy : ImportError numpy +_piconumpy_hpy : 5.03e-04 s ( 38.0 * Julia) +_piconumpy_cpython_capi : 2.90e-03 s (219.1 * Julia) ``` diff --git a/bench/microbench_low_level/result_sum_loop_index.md b/bench/microbench_low_level/result_sum_loop_index.md index 65a11c2..4a56d2b 100644 --- a/bench/microbench_low_level/result_sum_loop_index.md +++ b/bench/microbench_low_level/result_sum_loop_index.md @@ -20,10 +20,10 @@ hostname: voyage {'cache_tag': 'pypy37', 'version': sys.pypy_version_info(major=7, minor=3, micro=7, releaselevel='final', serial=0)} list : 1.19e-05 s ( 2.0 * Julia) -purepy : 1.20e-05 s ( 2.1 * Julia) -numpy : 4.07e-03 s (692.9 * Julia) -_piconumpy_hpy : 1.65e-04 s ( 28.2 * Julia) -_piconumpy_cpython_capi : 9.95e-04 s (169.5 * Julia) +purepy : 1.64e-05 s ( 2.8 * Julia) +numpy : 4.18e-03 s (711.4 * Julia) +_piconumpy_hpy : 1.73e-04 s ( 29.4 * Julia) +_piconumpy_cpython_capi : 8.44e-04 s (143.8 * Julia) ``` With CPython: @@ -33,9 +33,23 @@ bench sum_loop_index hostname: voyage {'cache_tag': 'cpython-39', 'version': sys.version_info(major=3, minor=9, micro=7, releaselevel='final', serial=0)} -list : 3.91e-04 s ( 66.6 * Julia) -purepy : 1.10e-03 s (186.9 * Julia) -numpy : 8.95e-04 s (152.5 * Julia) -_piconumpy_hpy : 4.85e-04 s ( 82.5 * Julia) -_piconumpy_cpython_capi : 4.15e-04 s ( 70.6 * Julia) +list : 3.91e-04 s ( 66.5 * Julia) +purepy : 1.11e-03 s (188.3 * Julia) +numpy : 8.93e-04 s (152.1 * Julia) +_piconumpy_hpy : 5.42e-04 s ( 92.3 * Julia) +_piconumpy_cpython_capi : 4.17e-04 s ( 71.0 * Julia) +``` + +With Python 3.8.5 (GraalVM CE Native 21.3.0) + +``` +bench sum_loop_index +hostname: voyage +{'cache_tag': 'graalpython-38', + 'version': sys.version_info(major=3, minor=8, micro=5, releaselevel='alpha', serial=0)} +list : 1.36e-05 s ( 2.3 * Julia) +purepy : 1.81e-05 s ( 3.1 * Julia) +numpy : ImportError numpy +_piconumpy_hpy : 3.68e-05 s ( 6.3 * Julia) +_piconumpy_cpython_capi : 1.08e-04 s ( 18.5 * Julia) ``` From 2a8cbf2160f3ac0b93eb9648dc7751f8a29dd5b6 Mon Sep 17 00:00:00 2001 From: paugier Date: Thu, 9 Dec 2021 16:58:51 +0100 Subject: [PATCH 17/32] Cleanup after rebase --- bench/microbench_low_level/Makefile | 2 +- bench/microbench_low_level/README.md | 180 ------------------ bench/microbench_low_level/bench.py | 1 + bench/microbench_low_level/result_sum_loop.md | 146 ++++++++++++++ 4 files changed, 148 insertions(+), 181 deletions(-) diff --git a/bench/microbench_low_level/Makefile b/bench/microbench_low_level/Makefile index e9f350e..256dc90 100644 --- a/bench/microbench_low_level/Makefile +++ b/bench/microbench_low_level/Makefile @@ -38,7 +38,7 @@ tmp_julia_init_zeros.txt: bench_init_zeros.jl clean: rm -f tmp_*.txt -produce_traces: tmp_result_julia.txt +produce_traces: tmp_julia_sum_loop.txt PYPYLOG=jit-log-opt,jit-summary,jit-backend-counts:tmp_traces_list.txt pypy bench.py list PYPYLOG=jit-log-opt,jit-summary,jit-backend-counts:tmp_traces_piconumpy_list.txt pypy bench.py purepy PYPYLOG=jit-log-opt,jit-summary,jit-backend-counts:tmp_traces_piconumpy_hpy.txt pypy bench.py _piconumpy_hpy diff --git a/bench/microbench_low_level/README.md b/bench/microbench_low_level/README.md index 0f1e32f..b8778f2 100644 --- a/bench/microbench_low_level/README.md +++ b/bench/microbench_low_level/README.md @@ -13,183 +13,3 @@ We measure the performance for functions containing low level Python code. between derivatives The files result_*.txt contain few results. - -We measure the performance for functions containing low level Python code. - -```python -def sum_loop(arr): - result = 0.0 - for value in arr: - result += value - return result -``` - -One can run the benchmarks with `make`. - -With PyPy3.7, I get: - -``` -{'cache_tag': 'pypy37', - 'version': sys.pypy_version_info(major=7, minor=3, micro=6, releaselevel='final', serial=0)} -list : 1.34e-05 s ( 1.3 * Julia) -piconumpy.purepy : 1.33e-05 s ( 1.3 * Julia) -numpy : 4.00e-03 s (376.6 * Julia) -_piconumpy_hpy : 1.99e-04 s ( 18.8 * Julia) -_piconumpy_cpython_capi : 1.27e-03 s (119.5 * Julia) -``` - -With CPython: - -``` -{'cache_tag': 'cpython-39', - 'version': sys.version_info(major=3, minor=9, micro=6, releaselevel='final', serial=0)} -list : 2.62e-04 s ( 24.6 * Julia) -piconumpy.purepy : 1.25e-03 s (117.5 * Julia) -numpy : 7.35e-04 s ( 69.2 * Julia) -_piconumpy_hpy : 4.26e-04 s ( 40.2 * Julia) -_piconumpy_cpython_capi : 3.52e-04 s ( 33.1 * Julia) -``` - -- PyPy is fast with list (1.3 * Julia, same order of magnitude that with Julia) -and as fast for a piconumpy array based on a list ("piconumpy.purepy", zero -cost abstraction!) - -- Numpy and _piconumpy_cpython_capi are both much slower with PyPy than with -Cpython. We can guess that the Numpy port to HPy would fix that. - -- piconumpy_hpy is a bit faster with PyPy (19 * Julia) than with CPython (40 * -Julia), however, we see that PyPy does not strongly accelerate piconumpy_hpy -(19 * Julia, 14 * piconumpy_list). - -## Traces PyPy `sum_loop` - -### List - -``` -+557: label(p0, p1, p6, p9, f35, f30, p15, p22, p26, i32, i27, p29, descr=TargetToken(140447503809120)) -debug_merge_point(0, 0, 'sum_loop;bench.py:23-25~#10 FOR_ITER') -+606: i44 = uint_ge(i32, i27) -guard_false(i44, descr=) [p0, p6, p9, p15, p1, i32, i27, i44, p26, f30, f35] -+615: f45 = getarrayitem_gc_f(p29, i32, descr=) -+622: i47 = int_add(i32, 1) -debug_merge_point(0, 0, 'sum_loop;bench.py:23-25~#12 STORE_FAST') -debug_merge_point(0, 0, 'sum_loop;bench.py:23-26~#14 LOAD_FAST') -debug_merge_point(0, 0, 'sum_loop;bench.py:23-26~#16 LOAD_FAST') -debug_merge_point(0, 0, 'sum_loop;bench.py:23-26~#18 INPLACE_ADD') -+626: f48 = float_add(f35, f45) -debug_merge_point(0, 0, 'sum_loop;bench.py:23-26~#20 STORE_FAST') -debug_merge_point(0, 0, 'sum_loop;bench.py:23-26~#22 JUMP_ABSOLUTE') -+630: setfield_gc(p15, i47, descr=) -+634: guard_not_invalidated(descr=) [p0, p6, p9, p15, p1, f45, f48, None, None] -+634: i51 = getfield_raw_i(140447672379264, descr=) -+647: i53 = int_sub(i51, 1) -+651: setfield_raw(140447672379264, i53, descr=) -+654: i56 = int_lt(i53, 0) -+658: guard_false(i56, descr=) [p0, p6, p9, p15, p1, i53, f45, f48, None, None] -debug_merge_point(0, 0, 'sum_loop;bench.py:23-25~#10 FOR_ITER') -+664: i57 = arraylen_gc(p29, descr=) -+664: jump(p0, p1, p6, p9, f48, f45, p15, p22, p26, i47, i27, p29, descr=TargetToken(140447503809120)) -``` - -### piconumpy purepy (based on list) - -``` -+705: label(p0, p1, p6, p9, f53, f46, p15, p22, i49, p29, p38, p42, i43, p45, descr=TargetToken(139748702723776)) -debug_merge_point(0, 0, 'sum_loop;bench.py:23-25~#10 FOR_ITER') -+760: guard_not_invalidated(descr=) [p0, p6, p9, p15, p1, p22, i49, f46, f53] -+760: p62 = force_token() -+760: enter_portal_frame(21, 28364) -debug_merge_point(1, 1, '__getitem__;/home/pierre/Dev/piconumpy/piconumpy/purepy.py:27-28~#0 LOAD_FAST') -debug_merge_point(1, 1, '__getitem__;/home/pierre/Dev/piconumpy/piconumpy/purepy.py:27-28~#2 LOAD_ATTR') -debug_merge_point(1, 1, '__getitem__;/home/pierre/Dev/piconumpy/piconumpy/purepy.py:27-28~#4 LOAD_FAST') -debug_merge_point(1, 1, '__getitem__;/home/pierre/Dev/piconumpy/piconumpy/purepy.py:27-28~#6 BINARY_SUBSCR') -+760: i65 = uint_ge(i49, i43) -+763: guard_false(i65, descr=) [p0, p6, p9, p15, p1, p22, i49, f46, f53] -+769: f66 = getarrayitem_gc_f(p45, i49, descr=) -debug_merge_point(1, 1, '__getitem__;/home/pierre/Dev/piconumpy/piconumpy/purepy.py:27-28~#8 RETURN_VALUE') -+776: leave_portal_frame(21) -+776: i69 = int_add(i49, 1) -debug_merge_point(0, 0, 'sum_loop;bench.py:23-25~#12 STORE_FAST') -debug_merge_point(0, 0, 'sum_loop;bench.py:23-26~#14 LOAD_FAST') -debug_merge_point(0, 0, 'sum_loop;bench.py:23-26~#16 LOAD_FAST') -debug_merge_point(0, 0, 'sum_loop;bench.py:23-26~#18 INPLACE_ADD') -+780: f70 = float_add(f53, f66) -debug_merge_point(0, 0, 'sum_loop;bench.py:23-26~#20 STORE_FAST') -debug_merge_point(0, 0, 'sum_loop;bench.py:23-26~#22 JUMP_ABSOLUTE') -+784: i72 = getfield_raw_i(139748871243648, descr=) -+797: i74 = int_sub(i72, 3) -+801: setfield_raw(139748871243648, i74, descr=) -+804: setfield_gc(p15, i69, descr=) -+808: i77 = int_lt(i74, 0) -+812: guard_false(i77, descr=) [p0, p6, p9, p15, p1, i74, f66, f70, None, None, None] -debug_merge_point(0, 0, 'sum_loop;bench.py:23-25~#10 FOR_ITER') -+818: i78 = arraylen_gc(p45, descr=) -+818: jump(p0, p1, p6, p9, f70, f66, p15, p22, i69, p29, p38, p42, i43, p45, descr=TargetToken(139748702723776)) -``` - -### piconumpy hpy - -``` -+1339: label(p0, p1, p6, p9, f73, p63, p15, i68, p62, descr=TargetToken(139865876151520)) -debug_merge_point(0, 0, 'sum_loop;bench.py:23-25~#10 FOR_ITER') -+1352: p82 = getfield_gc_r(p15, descr=) -+1356: guard_nonnull_class(p82, 139866025815200, descr=) [p0, p6, p9, p63, p15, p1, p82, f73] -+1376: p84 = getfield_gc_r(p82, descr=) -+1387: guard_value(p84, ConstPtr(ptr85), descr=) [p0, p6, p9, p63, p15, p1, p82, f73] -+1396: guard_not_invalidated(descr=) [p0, p6, p9, p63, p15, p1, p82, f73] -+1403: p87 = getfield_gc_r(ConstPtr(ptr86), descr=) -+1414: guard_value(p87, ConstPtr(ptr88), descr=) [p0, p6, p9, p63, p15, p1, p82, f73] -+1423: i90 = getfield_gc_i(ConstPtr(ptr89), descr=) -+1427: i92 = int_lt(i68, 0) -+1431: guard_false(i92, descr=) [p0, p6, p9, p63, p15, p1, p82, f73] -+1444: i94 = getfield_gc_i(ConstPtr(ptr93), descr=) -+1448: i95 = int_is_zero(i94) -+1451: guard_false(i95, descr=) [p0, p6, p9, p63, p15, p1, p82, f73] -+1457: i97 = int_sub(i94, 1) -+1461: p99 = getfield_gc_r(ConstPtr(ptr98), descr=) -+1465: i100 = getarrayitem_gc_i(p99, i97, descr=) -+1470: i101 = arraylen_gc(p99, descr=) -+1474: i103 = int_rshift(i101, 1) -+1477: i105 = int_sub(i103, 5) -+1481: i106 = int_lt(i97, i105) -+1484: cond_call(i106, ConstClass(_ll_list_resize_hint_really_look_inside_iff__listPtr_Signed_Bool), ConstPtr(ptr108), i97, 0, descr=) -+1490: guard_no_exception(descr=) [p0, p6, p9, p63, p15, p1, i68, i90, i100, p82, i97, f73] -+1490: setfield_gc(ConstPtr(ptr110), i97, descr=) -+1494: i112 = int_lt(i100, 0) -+1498: guard_false(i112, descr=) [p0, p6, p9, p63, p15, p1, i68, i90, i100, p82, f73] -+1522: setarrayitem_gc(p62, i100, p82, descr=) -+1527: p113 = force_token() -+1548: setfield_gc(p0, p113, descr=) -+1552: i115 = call_may_force_i(i90, 139866044538144, i100, i68, descr=) -+1663: guard_not_forced(descr=) [p0, p6, p9, p63, p15, p1, i100, i115, i68, p82, f73] -+1674: guard_no_exception(descr=) [p0, p6, p9, p63, p15, p1, i100, i115, i68, p82, f73] -+1688: call_n(ConstClass(close), i100, descr=) -+1754: guard_no_exception(descr=) [p0, p6, p9, p63, p15, p1, i115, i68, p82, f73] -+1768: i117 = int_is_true(i115) -+1771: guard_true(i117, descr=) [p0, p6, p9, p63, p15, p1, i115, i68, p82, f73] -+1784: p119 = getfield_gc_r(ConstPtr(ptr118), descr=) -+1788: p120 = getarrayitem_gc_r(p119, i115, descr=) -+1793: call_n(ConstClass(close), i115, descr=) -+1866: guard_no_exception(descr=) [p0, p6, p9, p63, p15, p1, p120, i68, p82, f73] -+1880: guard_nonnull_class(p120, ConstClass(W_FloatObject), descr=) [p0, p6, p9, p63, p15, p1, p120, i68, p82, f73] -+1907: i123 = getfield_gc_i(p15, descr=) -+1918: i125 = int_add(i123, 1) -debug_merge_point(0, 0, 'sum_loop;bench.py:23-25~#12 STORE_FAST') -debug_merge_point(0, 0, 'sum_loop;bench.py:23-26~#14 LOAD_FAST') -debug_merge_point(0, 0, 'sum_loop;bench.py:23-26~#16 LOAD_FAST') -debug_merge_point(0, 0, 'sum_loop;bench.py:23-26~#18 INPLACE_ADD') -+1923: setfield_gc(p15, i125, descr=) -+1927: f126 = getfield_gc_f(p120, descr=) -+1933: f127 = float_add(f73, f126) -debug_merge_point(0, 0, 'sum_loop;bench.py:23-26~#20 STORE_FAST') -debug_merge_point(0, 0, 'sum_loop;bench.py:23-26~#22 JUMP_ABSOLUTE') -+1947: guard_not_invalidated(descr=) [p0, p6, p9, p120, p15, p1, f127, None, None, None] -+1947: i129 = getfield_raw_i(139866044675968, descr=) -+1960: i131 = int_sub(i129, 3) -+1964: setfield_raw(139866044675968, i131, descr=) -+1967: i134 = int_lt(i131, 0) -+1971: guard_false(i134, descr=) [p0, p6, p9, p120, p15, p1, i131, f127, None, None, None] -debug_merge_point(0, 0, 'sum_loop;bench.py:23-25~#10 FOR_ITER') -+1977: i135 = arraylen_gc(p119, descr=) -+1977: jump(p0, p1, p6, p9, f127, p120, p15, i125, p119, descr=TargetToken(139865876151520)) -``` diff --git a/bench/microbench_low_level/bench.py b/bench/microbench_low_level/bench.py index 8524fd5..305c917 100644 --- a/bench/microbench_low_level/bench.py +++ b/bench/microbench_low_level/bench.py @@ -100,6 +100,7 @@ def median(sequence): return tmp[len(tmp) // 2] +# measure during ~ 2s t0 = perf_counter() times = [] while perf_counter() - t0 < 2.0: diff --git a/bench/microbench_low_level/result_sum_loop.md b/bench/microbench_low_level/result_sum_loop.md index f2d0a86..48e32f2 100644 --- a/bench/microbench_low_level/result_sum_loop.md +++ b/bench/microbench_low_level/result_sum_loop.md @@ -54,3 +54,149 @@ numpy : ImportError numpy _piconumpy_hpy : 5.03e-04 s ( 38.0 * Julia) _piconumpy_cpython_capi : 2.90e-03 s (219.1 * Julia) ``` + +## Summary + +- PyPy is fast with list (1.3 * Julia, same order of magnitude that with Julia) +and as fast for a piconumpy array based on a list ("piconumpy.purepy", zero +cost abstraction!) + +- Numpy and _piconumpy_cpython_capi are both much slower with PyPy than with +Cpython. We can guess that the Numpy port to HPy would fix that. + +- piconumpy_hpy is a bit faster with PyPy (19 * Julia) than with CPython (40 * +Julia), however, we see that PyPy does not strongly accelerate piconumpy_hpy +(19 * Julia, 14 * piconumpy_list). + +## Traces PyPy `sum_loop` + +### List + +``` ++557: label(p0, p1, p6, p9, f35, f30, p15, p22, p26, i32, i27, p29, descr=TargetToken(140447503809120)) +debug_merge_point(0, 0, 'sum_loop;bench.py:23-25~#10 FOR_ITER') ++606: i44 = uint_ge(i32, i27) +guard_false(i44, descr=) [p0, p6, p9, p15, p1, i32, i27, i44, p26, f30, f35] ++615: f45 = getarrayitem_gc_f(p29, i32, descr=) ++622: i47 = int_add(i32, 1) +debug_merge_point(0, 0, 'sum_loop;bench.py:23-25~#12 STORE_FAST') +debug_merge_point(0, 0, 'sum_loop;bench.py:23-26~#14 LOAD_FAST') +debug_merge_point(0, 0, 'sum_loop;bench.py:23-26~#16 LOAD_FAST') +debug_merge_point(0, 0, 'sum_loop;bench.py:23-26~#18 INPLACE_ADD') ++626: f48 = float_add(f35, f45) +debug_merge_point(0, 0, 'sum_loop;bench.py:23-26~#20 STORE_FAST') +debug_merge_point(0, 0, 'sum_loop;bench.py:23-26~#22 JUMP_ABSOLUTE') ++630: setfield_gc(p15, i47, descr=) ++634: guard_not_invalidated(descr=) [p0, p6, p9, p15, p1, f45, f48, None, None] ++634: i51 = getfield_raw_i(140447672379264, descr=) ++647: i53 = int_sub(i51, 1) ++651: setfield_raw(140447672379264, i53, descr=) ++654: i56 = int_lt(i53, 0) ++658: guard_false(i56, descr=) [p0, p6, p9, p15, p1, i53, f45, f48, None, None] +debug_merge_point(0, 0, 'sum_loop;bench.py:23-25~#10 FOR_ITER') ++664: i57 = arraylen_gc(p29, descr=) ++664: jump(p0, p1, p6, p9, f48, f45, p15, p22, p26, i47, i27, p29, descr=TargetToken(140447503809120)) +``` + +### piconumpy purepy (based on list) + +``` ++705: label(p0, p1, p6, p9, f53, f46, p15, p22, i49, p29, p38, p42, i43, p45, descr=TargetToken(139748702723776)) +debug_merge_point(0, 0, 'sum_loop;bench.py:23-25~#10 FOR_ITER') ++760: guard_not_invalidated(descr=) [p0, p6, p9, p15, p1, p22, i49, f46, f53] ++760: p62 = force_token() ++760: enter_portal_frame(21, 28364) +debug_merge_point(1, 1, '__getitem__;/home/pierre/Dev/piconumpy/piconumpy/purepy.py:27-28~#0 LOAD_FAST') +debug_merge_point(1, 1, '__getitem__;/home/pierre/Dev/piconumpy/piconumpy/purepy.py:27-28~#2 LOAD_ATTR') +debug_merge_point(1, 1, '__getitem__;/home/pierre/Dev/piconumpy/piconumpy/purepy.py:27-28~#4 LOAD_FAST') +debug_merge_point(1, 1, '__getitem__;/home/pierre/Dev/piconumpy/piconumpy/purepy.py:27-28~#6 BINARY_SUBSCR') ++760: i65 = uint_ge(i49, i43) ++763: guard_false(i65, descr=) [p0, p6, p9, p15, p1, p22, i49, f46, f53] ++769: f66 = getarrayitem_gc_f(p45, i49, descr=) +debug_merge_point(1, 1, '__getitem__;/home/pierre/Dev/piconumpy/piconumpy/purepy.py:27-28~#8 RETURN_VALUE') ++776: leave_portal_frame(21) ++776: i69 = int_add(i49, 1) +debug_merge_point(0, 0, 'sum_loop;bench.py:23-25~#12 STORE_FAST') +debug_merge_point(0, 0, 'sum_loop;bench.py:23-26~#14 LOAD_FAST') +debug_merge_point(0, 0, 'sum_loop;bench.py:23-26~#16 LOAD_FAST') +debug_merge_point(0, 0, 'sum_loop;bench.py:23-26~#18 INPLACE_ADD') ++780: f70 = float_add(f53, f66) +debug_merge_point(0, 0, 'sum_loop;bench.py:23-26~#20 STORE_FAST') +debug_merge_point(0, 0, 'sum_loop;bench.py:23-26~#22 JUMP_ABSOLUTE') ++784: i72 = getfield_raw_i(139748871243648, descr=) ++797: i74 = int_sub(i72, 3) ++801: setfield_raw(139748871243648, i74, descr=) ++804: setfield_gc(p15, i69, descr=) ++808: i77 = int_lt(i74, 0) ++812: guard_false(i77, descr=) [p0, p6, p9, p15, p1, i74, f66, f70, None, None, None] +debug_merge_point(0, 0, 'sum_loop;bench.py:23-25~#10 FOR_ITER') ++818: i78 = arraylen_gc(p45, descr=) ++818: jump(p0, p1, p6, p9, f70, f66, p15, p22, i69, p29, p38, p42, i43, p45, descr=TargetToken(139748702723776)) +``` + +### piconumpy hpy + +``` ++1339: label(p0, p1, p6, p9, f73, p63, p15, i68, p62, descr=TargetToken(139865876151520)) +debug_merge_point(0, 0, 'sum_loop;bench.py:23-25~#10 FOR_ITER') ++1352: p82 = getfield_gc_r(p15, descr=) ++1356: guard_nonnull_class(p82, 139866025815200, descr=) [p0, p6, p9, p63, p15, p1, p82, f73] ++1376: p84 = getfield_gc_r(p82, descr=) ++1387: guard_value(p84, ConstPtr(ptr85), descr=) [p0, p6, p9, p63, p15, p1, p82, f73] ++1396: guard_not_invalidated(descr=) [p0, p6, p9, p63, p15, p1, p82, f73] ++1403: p87 = getfield_gc_r(ConstPtr(ptr86), descr=) ++1414: guard_value(p87, ConstPtr(ptr88), descr=) [p0, p6, p9, p63, p15, p1, p82, f73] ++1423: i90 = getfield_gc_i(ConstPtr(ptr89), descr=) ++1427: i92 = int_lt(i68, 0) ++1431: guard_false(i92, descr=) [p0, p6, p9, p63, p15, p1, p82, f73] ++1444: i94 = getfield_gc_i(ConstPtr(ptr93), descr=) ++1448: i95 = int_is_zero(i94) ++1451: guard_false(i95, descr=) [p0, p6, p9, p63, p15, p1, p82, f73] ++1457: i97 = int_sub(i94, 1) ++1461: p99 = getfield_gc_r(ConstPtr(ptr98), descr=) ++1465: i100 = getarrayitem_gc_i(p99, i97, descr=) ++1470: i101 = arraylen_gc(p99, descr=) ++1474: i103 = int_rshift(i101, 1) ++1477: i105 = int_sub(i103, 5) ++1481: i106 = int_lt(i97, i105) ++1484: cond_call(i106, ConstClass(_ll_list_resize_hint_really_look_inside_iff__listPtr_Signed_Bool), ConstPtr(ptr108), i97, 0, descr=) ++1490: guard_no_exception(descr=) [p0, p6, p9, p63, p15, p1, i68, i90, i100, p82, i97, f73] ++1490: setfield_gc(ConstPtr(ptr110), i97, descr=) ++1494: i112 = int_lt(i100, 0) ++1498: guard_false(i112, descr=) [p0, p6, p9, p63, p15, p1, i68, i90, i100, p82, f73] ++1522: setarrayitem_gc(p62, i100, p82, descr=) ++1527: p113 = force_token() ++1548: setfield_gc(p0, p113, descr=) ++1552: i115 = call_may_force_i(i90, 139866044538144, i100, i68, descr=) ++1663: guard_not_forced(descr=) [p0, p6, p9, p63, p15, p1, i100, i115, i68, p82, f73] ++1674: guard_no_exception(descr=) [p0, p6, p9, p63, p15, p1, i100, i115, i68, p82, f73] ++1688: call_n(ConstClass(close), i100, descr=) ++1754: guard_no_exception(descr=) [p0, p6, p9, p63, p15, p1, i115, i68, p82, f73] ++1768: i117 = int_is_true(i115) ++1771: guard_true(i117, descr=) [p0, p6, p9, p63, p15, p1, i115, i68, p82, f73] ++1784: p119 = getfield_gc_r(ConstPtr(ptr118), descr=) ++1788: p120 = getarrayitem_gc_r(p119, i115, descr=) ++1793: call_n(ConstClass(close), i115, descr=) ++1866: guard_no_exception(descr=) [p0, p6, p9, p63, p15, p1, p120, i68, p82, f73] ++1880: guard_nonnull_class(p120, ConstClass(W_FloatObject), descr=) [p0, p6, p9, p63, p15, p1, p120, i68, p82, f73] ++1907: i123 = getfield_gc_i(p15, descr=) ++1918: i125 = int_add(i123, 1) +debug_merge_point(0, 0, 'sum_loop;bench.py:23-25~#12 STORE_FAST') +debug_merge_point(0, 0, 'sum_loop;bench.py:23-26~#14 LOAD_FAST') +debug_merge_point(0, 0, 'sum_loop;bench.py:23-26~#16 LOAD_FAST') +debug_merge_point(0, 0, 'sum_loop;bench.py:23-26~#18 INPLACE_ADD') ++1923: setfield_gc(p15, i125, descr=) ++1927: f126 = getfield_gc_f(p120, descr=) ++1933: f127 = float_add(f73, f126) +debug_merge_point(0, 0, 'sum_loop;bench.py:23-26~#20 STORE_FAST') +debug_merge_point(0, 0, 'sum_loop;bench.py:23-26~#22 JUMP_ABSOLUTE') ++1947: guard_not_invalidated(descr=) [p0, p6, p9, p120, p15, p1, f127, None, None, None] ++1947: i129 = getfield_raw_i(139866044675968, descr=) ++1960: i131 = int_sub(i129, 3) ++1964: setfield_raw(139866044675968, i131, descr=) ++1967: i134 = int_lt(i131, 0) ++1971: guard_false(i134, descr=) [p0, p6, p9, p120, p15, p1, i131, f127, None, None, None] +debug_merge_point(0, 0, 'sum_loop;bench.py:23-25~#10 FOR_ITER') ++1977: i135 = arraylen_gc(p119, descr=) ++1977: jump(p0, p1, p6, p9, f127, p120, p15, i125, p119, descr=TargetToken(139865876151520)) +``` From 3e379acf854cc1a5f0355de74aea4934f934f05b Mon Sep 17 00:00:00 2001 From: paugier Date: Fri, 10 Dec 2021 10:48:40 +0100 Subject: [PATCH 18/32] Cosmetic changes output microbench --- bench/microbench_low_level/bench.py | 3 +++ bench/microbench_low_level/result_cort.md | 18 +++++++++--------- .../microbench_low_level/result_init_zeros.md | 18 +++++++++--------- bench/microbench_low_level/result_sum_loop.md | 18 +++++++++--------- .../result_sum_loop_index.md | 18 +++++++++--------- 5 files changed, 39 insertions(+), 36 deletions(-) diff --git a/bench/microbench_low_level/bench.py b/bench/microbench_low_level/bench.py index 305c917..5ab4a21 100644 --- a/bench/microbench_low_level/bench.py +++ b/bench/microbench_low_level/bench.py @@ -85,6 +85,9 @@ def cort(arr): if "piconumpy" not in method: method = f"piconumpy.{method}" +if "_piconumpy_" in method: + method = method.replace("_piconumpy_", "piconumpy.") + size = 10000 # warming during ~ 1s diff --git a/bench/microbench_low_level/result_cort.md b/bench/microbench_low_level/result_cort.md index 4b8ff6c..b5578bf 100644 --- a/bench/microbench_low_level/result_cort.md +++ b/bench/microbench_low_level/result_cort.md @@ -29,10 +29,10 @@ hostname: voyage {'cache_tag': 'pypy37', 'version': sys.pypy_version_info(major=7, minor=3, micro=7, releaselevel='final', serial=0)} list : 4.29e-05 s ( 1.8 * Julia) -purepy : 4.12e-05 s ( 1.7 * Julia) +piconumpy.purepy : 4.12e-05 s ( 1.7 * Julia) numpy : 4.77e-02 s (1975.5 * Julia) -_piconumpy_hpy : 1.46e-03 s ( 60.5 * Julia) -_piconumpy_cpython_capi : 6.96e-03 s (288.5 * Julia) +piconumpy.hpy : 1.46e-03 s ( 60.5 * Julia) +piconumpy.cpython_capi : 6.96e-03 s (288.5 * Julia) ``` With CPython: @@ -43,10 +43,10 @@ hostname: voyage {'cache_tag': 'cpython-39', 'version': sys.version_info(major=3, minor=9, micro=7, releaselevel='final', serial=0)} list : 4.42e-03 s (183.4 * Julia) -purepy : 1.04e-02 s (430.0 * Julia) +piconumpy.purepy : 1.04e-02 s (430.0 * Julia) numpy : 9.76e-03 s (404.4 * Julia) -_piconumpy_hpy : 5.66e-03 s (234.7 * Julia) -_piconumpy_cpython_capi : 4.77e-03 s (197.7 * Julia) +piconumpy.hpy : 5.66e-03 s (234.7 * Julia) +piconumpy.cpython_capi : 4.77e-03 s (197.7 * Julia) ``` With Python 3.8.5 (GraalVM CE Native 21.3.0) @@ -57,8 +57,8 @@ hostname: voyage {'cache_tag': 'graalpython-38', 'version': sys.version_info(major=3, minor=8, micro=5, releaselevel='alpha', serial=0)} list : 2.44e-05 s ( 1.0 * Julia) -purepy : 3.13e-05 s ( 1.3 * Julia) +piconumpy.purepy : 3.13e-05 s ( 1.3 * Julia) numpy : ImportError numpy -_piconumpy_hpy : 1.69e-04 s ( 7.0 * Julia) -_piconumpy_cpython_capi : 3.55e-04 s ( 14.7 * Julia) +piconumpy.hpy : 1.69e-04 s ( 7.0 * Julia) +piconumpy.cpython_capi : 3.55e-04 s ( 14.7 * Julia) ``` diff --git a/bench/microbench_low_level/result_init_zeros.md b/bench/microbench_low_level/result_init_zeros.md index 68eee34..b88e4bd 100644 --- a/bench/microbench_low_level/result_init_zeros.md +++ b/bench/microbench_low_level/result_init_zeros.md @@ -18,10 +18,10 @@ hostname: voyage {'cache_tag': 'pypy37', 'version': sys.pypy_version_info(major=7, minor=3, micro=7, releaselevel='final', serial=0)} list : 2.63e-05 s ( 5.4 * Julia) -purepy : 2.99e-05 s ( 6.1 * Julia) +piconumpy.purepy : 2.99e-05 s ( 6.1 * Julia) numpy : 1.17e-02 s (2403.5 * Julia) -_piconumpy_hpy : 4.58e-04 s ( 94.1 * Julia) -_piconumpy_cpython_capi : 8.46e-04 s (173.6 * Julia) +piconumpy.hpy : 4.58e-04 s ( 94.1 * Julia) +piconumpy.cpython_capi : 8.46e-04 s (173.6 * Julia) ``` With CPython: @@ -32,10 +32,10 @@ hostname: voyage {'cache_tag': 'cpython-39', 'version': sys.version_info(major=3, minor=9, micro=7, releaselevel='final', serial=0)} list : 5.34e-04 s (109.6 * Julia) -purepy : 2.03e-03 s (417.4 * Julia) +piconumpy.purepy : 2.03e-03 s (417.4 * Julia) numpy : 1.17e-03 s (239.3 * Julia) -_piconumpy_hpy : 7.51e-04 s (154.1 * Julia) -_piconumpy_cpython_capi : 5.44e-04 s (111.5 * Julia) +piconumpy.hpy : 7.51e-04 s (154.1 * Julia) +piconumpy.cpython_capi : 5.44e-04 s (111.5 * Julia) ``` With Python 3.8.5 (GraalVM CE Native 21.3.0) @@ -46,8 +46,8 @@ hostname: voyage {'cache_tag': 'graalpython-38', 'version': sys.version_info(major=3, minor=8, micro=5, releaselevel='alpha', serial=0)} list : 1.37e-05 s ( 2.8 * Julia) -purepy : 1.93e-05 s ( 4.0 * Julia) +piconumpy.purepy : 1.93e-05 s ( 4.0 * Julia) numpy : ImportError numpy -_piconumpy_hpy : 4.68e-05 s ( 9.6 * Julia) -_piconumpy_cpython_capi : 1.74e-04 s ( 35.8 * Julia) +piconumpy.hpy : 4.68e-05 s ( 9.6 * Julia) +piconumpy.cpython_capi : 1.74e-04 s ( 35.8 * Julia) ``` diff --git a/bench/microbench_low_level/result_sum_loop.md b/bench/microbench_low_level/result_sum_loop.md index 48e32f2..b415051 100644 --- a/bench/microbench_low_level/result_sum_loop.md +++ b/bench/microbench_low_level/result_sum_loop.md @@ -20,10 +20,10 @@ hostname: voyage {'cache_tag': 'pypy37', 'version': sys.pypy_version_info(major=7, minor=3, micro=7, releaselevel='final', serial=0)} list : 2.35e-05 s ( 1.8 * Julia) -purepy : 2.60e-05 s ( 2.0 * Julia) +piconumpy.purepy : 2.60e-05 s ( 2.0 * Julia) numpy : 8.97e-03 s (677.0 * Julia) -_piconumpy_hpy : 3.73e-04 s ( 28.2 * Julia) -_piconumpy_cpython_capi : 1.75e-03 s (132.4 * Julia) +piconumpy.hpy : 3.73e-04 s ( 28.2 * Julia) +piconumpy.cpython_capi : 1.75e-03 s (132.4 * Julia) ``` With CPython: @@ -34,10 +34,10 @@ hostname: voyage {'cache_tag': 'cpython-39', 'version': sys.version_info(major=3, minor=9, micro=7, releaselevel='final', serial=0)} list : 3.65e-04 s ( 27.5 * Julia) -purepy : 2.17e-03 s (164.1 * Julia) +piconumpy.purepy : 2.17e-03 s (164.1 * Julia) numpy : 1.09e-03 s ( 82.2 * Julia) -_piconumpy_hpy : 7.39e-04 s ( 55.8 * Julia) -_piconumpy_cpython_capi : 5.07e-04 s ( 38.3 * Julia) +piconumpy.hpy : 7.39e-04 s ( 55.8 * Julia) +piconumpy.cpython_capi : 5.07e-04 s ( 38.3 * Julia) ``` @@ -49,10 +49,10 @@ hostname: voyage {'cache_tag': 'graalpython-38', 'version': sys.version_info(major=3, minor=8, micro=5, releaselevel='alpha', serial=0)} list : 1.92e-05 s ( 1.4 * Julia) -purepy : 3.61e-05 s ( 2.7 * Julia) +piconumpy.purepy : 3.61e-05 s ( 2.7 * Julia) numpy : ImportError numpy -_piconumpy_hpy : 5.03e-04 s ( 38.0 * Julia) -_piconumpy_cpython_capi : 2.90e-03 s (219.1 * Julia) +piconumpy.hpy : 5.03e-04 s ( 38.0 * Julia) +piconumpy.cpython_capi : 2.90e-03 s (219.1 * Julia) ``` ## Summary diff --git a/bench/microbench_low_level/result_sum_loop_index.md b/bench/microbench_low_level/result_sum_loop_index.md index 4a56d2b..fd63301 100644 --- a/bench/microbench_low_level/result_sum_loop_index.md +++ b/bench/microbench_low_level/result_sum_loop_index.md @@ -20,10 +20,10 @@ hostname: voyage {'cache_tag': 'pypy37', 'version': sys.pypy_version_info(major=7, minor=3, micro=7, releaselevel='final', serial=0)} list : 1.19e-05 s ( 2.0 * Julia) -purepy : 1.64e-05 s ( 2.8 * Julia) +piconumpy.purepy : 1.64e-05 s ( 2.8 * Julia) numpy : 4.18e-03 s (711.4 * Julia) -_piconumpy_hpy : 1.73e-04 s ( 29.4 * Julia) -_piconumpy_cpython_capi : 8.44e-04 s (143.8 * Julia) +piconumpy.hpy : 1.73e-04 s ( 29.4 * Julia) +piconumpy.cpython_capi : 8.44e-04 s (143.8 * Julia) ``` With CPython: @@ -34,10 +34,10 @@ hostname: voyage {'cache_tag': 'cpython-39', 'version': sys.version_info(major=3, minor=9, micro=7, releaselevel='final', serial=0)} list : 3.91e-04 s ( 66.5 * Julia) -purepy : 1.11e-03 s (188.3 * Julia) +piconumpy.purepy : 1.11e-03 s (188.3 * Julia) numpy : 8.93e-04 s (152.1 * Julia) -_piconumpy_hpy : 5.42e-04 s ( 92.3 * Julia) -_piconumpy_cpython_capi : 4.17e-04 s ( 71.0 * Julia) +piconumpy.hpy : 5.42e-04 s ( 92.3 * Julia) +piconumpy.cpython_capi : 4.17e-04 s ( 71.0 * Julia) ``` With Python 3.8.5 (GraalVM CE Native 21.3.0) @@ -48,8 +48,8 @@ hostname: voyage {'cache_tag': 'graalpython-38', 'version': sys.version_info(major=3, minor=8, micro=5, releaselevel='alpha', serial=0)} list : 1.36e-05 s ( 2.3 * Julia) -purepy : 1.81e-05 s ( 3.1 * Julia) +piconumpy.purepy : 1.81e-05 s ( 3.1 * Julia) numpy : ImportError numpy -_piconumpy_hpy : 3.68e-05 s ( 6.3 * Julia) -_piconumpy_cpython_capi : 1.08e-04 s ( 18.5 * Julia) +piconumpy.hpy : 3.68e-05 s ( 6.3 * Julia) +piconumpy.cpython_capi : 1.08e-04 s ( 18.5 * Julia) ``` From 9d7f176a8d9e9c0f969c9506bd192b055529eb22 Mon Sep 17 00:00:00 2001 From: paugier Date: Fri, 10 Dec 2021 14:36:37 +0100 Subject: [PATCH 19/32] Add "board" benchmark --- bench/microbench_low_level/Makefile | 7 ++ bench/microbench_low_level/README.md | 3 + bench/microbench_low_level/bench.py | 76 ++++++++++++------ bench/microbench_low_level/bench_board.jl | 44 +++++++++++ bench/microbench_low_level/result_board.md | 77 +++++++++++++++++++ bench/microbench_low_level/result_sum_loop.md | 1 - 6 files changed, 184 insertions(+), 24 deletions(-) create mode 100644 bench/microbench_low_level/bench_board.jl create mode 100644 bench/microbench_low_level/result_board.md diff --git a/bench/microbench_low_level/Makefile b/bench/microbench_low_level/Makefile index 256dc90..15c96e3 100644 --- a/bench/microbench_low_level/Makefile +++ b/bench/microbench_low_level/Makefile @@ -13,6 +13,9 @@ bench_cort: tmp_julia_cort.txt _bench bench_init_zeros: NAME_BENCH=init_zeros bench_init_zeros: tmp_julia_init_zeros.txt _bench +bench_board: NAME_BENCH=board +bench_board: tmp_julia_board.txt _bench + _bench: @echo bench $(NAME_BENCH) @python -c "from socket import gethostname as f; print('hostname:', f())" @@ -35,6 +38,10 @@ tmp_julia_cort.txt: bench_cort.jl tmp_julia_init_zeros.txt: bench_init_zeros.jl @julia bench_init_zeros.jl > tmp_julia_init_zeros.txt +tmp_julia_board.txt: bench_board.jl + @julia bench_board.jl > tmp_julia_board.txt + + clean: rm -f tmp_*.txt diff --git a/bench/microbench_low_level/README.md b/bench/microbench_low_level/README.md index b8778f2..e017263 100644 --- a/bench/microbench_low_level/README.md +++ b/bench/microbench_low_level/README.md @@ -12,4 +12,7 @@ We measure the performance for functions containing low level Python code. - `cort` (command `make bench_cort`): normalized cosine similarity measure between derivatives +- `board` (command `make bench_board`): few indexing, simple float computations + with sin/cos and instantiation of a small array. + The files result_*.txt contain few results. diff --git a/bench/microbench_low_level/bench.py b/bench/microbench_low_level/bench.py index 5ab4a21..570d26a 100644 --- a/bench/microbench_low_level/bench.py +++ b/bench/microbench_low_level/bench.py @@ -2,7 +2,7 @@ from time import perf_counter from pathlib import Path from random import random -from math import sqrt +from math import sqrt, pi, sin, cos try: method = sys.argv[1] @@ -15,6 +15,33 @@ name_bench = "sum_loop" +if method == "_piconumpy_hpy": + from piconumpy.util_hpy import import_ext + + ext = import_ext() + array = ext.array +elif method == "list": + array = list +elif method == "numpy": + + try: + import numpy as np + except ImportError: + print(f"{method:30s}: ImportError numpy") + sys.exit(0) + + array = np.array +else: + d = {} + exec(f"from piconumpy.{method} import array", d) + array = d["array"] + if "piconumpy" not in method: + method = f"piconumpy.{method}" + +if "_piconumpy_" in method: + method = method.replace("_piconumpy_", "piconumpy.") + + tmp_result_julia = Path(f"tmp_julia_{name_bench}.txt") if tmp_result_julia.exists(): with open(tmp_result_julia) as file: @@ -59,34 +86,37 @@ def cort(arr): return _cort(arr, arr) -compute_from_arr = locals()[name_bench] +def board(X_0): + x0 = X_0[0] + y0 = X_0[1] + u0 = X_0[2] + v0 = X_0[3] + g = 9.81 + b = 0.5 + a = 0.25 + c = 0.5 + p = (2 * pi) / 10.0 + q = (2 * pi) / 4.0 -if method == "_piconumpy_hpy": - from piconumpy.util_hpy import import_ext + H_x = -a + b * p * sin(p * x0) * cos(q * y0) + H_xx = b * p ** 2 * cos(p * x0) * cos(q * y0) + H_y = b * q * cos(p * x0) * sin(q * y0) + H_yy = b * q ** 2 * cos(p * x0) * cos(q * y0) + H_xy = -b * q * p * sin(p * x0) * sin(q * y0) - ext = import_ext() - array = ext.array -elif method == "list": - array = list -elif method == "numpy": + F = (g + H_xx * u0 ** 2 + 2 * H_xy * u0 * v0 + H_yy * v0 ** 2) / ( + 1 + H_x ** 2 + H_y ** 2 + ) - try: - import numpy as np - except ImportError: - print(f"{method:30s}: ImportError numpy") - sys.exit(0) + dU = -F * H_x - c * u0 + dV = -F * H_y - c * v0 - array = np.array -else: - d = {} - exec(f"from piconumpy.{method} import array", d) - array = d["array"] - if "piconumpy" not in method: - method = f"piconumpy.{method}" + return array([u0, v0, dU, dV]) + + +compute_from_arr = locals()[name_bench] -if "_piconumpy_" in method: - method = method.replace("_piconumpy_", "piconumpy.") size = 10000 diff --git a/bench/microbench_low_level/bench_board.jl b/bench/microbench_low_level/bench_board.jl new file mode 100644 index 0000000..63187b1 --- /dev/null +++ b/bench/microbench_low_level/bench_board.jl @@ -0,0 +1,44 @@ +using Statistics + +function board(X_0::Array) + + x0 = copy(X_0[1]) + y0 = copy(X_0[2]) + u0 = copy(X_0[3]) + v0 = copy(X_0[4]) + + g = 9.81 + a = 0.25 + b = 0.5 + c = 0.5 + p = (2*π)/10.0 + q = (2*π)/4.0 + + H_x = -a + b*p*sin(p*x0)*cos(q*y0) + H_xx = b*p^2 * cos(p*x0)*cos(q*y0) + H_y = b*q*cos(p*x0)*sin(q*y0) + H_yy = b*q^2 * cos(p*x0)*cos(q*y0) + H_xy = -b*q*p*sin(p*x0)*sin(q*y0) + + F = (g + H_xx*u0^2 + 2*H_xy*u0*v0 + H_yy*v0^2)/(1 + H_x^2 + H_y^2) + + dU = -F*H_x - c*u0 + dV = -F*H_y - c*v0 + + return [u0, v0, dU, dV] + +end + +compute_from_arr = board + +size = 10000 +nb_runs = 200 + +times = zeros(nb_runs) + +for irun in 1:nb_runs + arr = rand(size) + times[irun] = @elapsed compute_from_arr(arr) +end + +println(median(times)) diff --git a/bench/microbench_low_level/result_board.md b/bench/microbench_low_level/result_board.md new file mode 100644 index 0000000..30b407b --- /dev/null +++ b/bench/microbench_low_level/result_board.md @@ -0,0 +1,77 @@ +# Microbenchmark board + +We measure the performance for this function: + +```python +def board(X_0): + x0 = X_0[0] + y0 = X_0[1] + u0 = X_0[2] + v0 = X_0[3] + + g = 9.81 + b = 0.5 + a = 0.25 + c = 0.5 + p = (2 * pi) / 10.0 + q = (2 * pi) / 4.0 + + H_x = -a + b * p * sin(p * x0) * cos(q * y0) + H_xx = b * p ** 2 * cos(p * x0) * cos(q * y0) + H_y = b * q * cos(p * x0) * sin(q * y0) + H_yy = b * q ** 2 * cos(p * x0) * cos(q * y0) + H_xy = -b * q * p * sin(p * x0) * sin(q * y0) + + F = (g + H_xx * u0 ** 2 + 2 * H_xy * u0 * v0 + H_yy * v0 ** 2) / ( + 1 + H_x ** 2 + H_y ** 2 + ) + + dU = -F * H_x - c * u0 + dV = -F * H_y - c * v0 + + return array([u0, v0, dU, dV]) +``` + +One can run the benchmarks with `make bench_board`. + +With PyPy3.7, I get: + +``` +bench board +hostname: voyage +{'cache_tag': 'pypy37', + 'version': sys.pypy_version_info(major=7, minor=3, micro=7, releaselevel='final', serial=0)} +list : 3.21e-07 s ( 0.9 * Julia) +piconumpy.purepy : 1.37e-05 s ( 36.9 * Julia) +numpy : 1.18e-04 s (316.6 * Julia) +piconumpy.hpy : 1.26e-05 s ( 33.8 * Julia) +piconumpy.cpython_capi : 5.52e-05 s (148.6 * Julia) +``` + +With CPython: + +``` +bench board +hostname: voyage +{'cache_tag': 'cpython-39', + 'version': sys.version_info(major=3, minor=9, micro=7, releaselevel='final', serial=0)} +list : 5.16e-06 s ( 13.9 * Julia) +piconumpy.purepy : 8.04e-06 s ( 21.6 * Julia) +numpy : 1.01e-05 s ( 27.1 * Julia) +piconumpy.hpy : 5.90e-06 s ( 15.9 * Julia) +piconumpy.cpython_capi : 5.56e-06 s ( 15.0 * Julia) +``` + +With Python 3.8.5 (GraalVM CE Native 21.3.0) + +``` +bench board +hostname: voyage +{'cache_tag': 'graalpython-38', + 'version': sys.version_info(major=3, minor=8, micro=5, releaselevel='alpha', serial=0)} +list : 1.15e-05 s ( 30.9 * Julia) +piconumpy.purepy : 1.74e-05 s ( 46.8 * Julia) +numpy : ImportError numpy +piconumpy.hpy : 4.91e-05 s (132.2 * Julia) +piconumpy.cpython_capi : 6.19e-05 s (166.7 * Julia) +``` diff --git a/bench/microbench_low_level/result_sum_loop.md b/bench/microbench_low_level/result_sum_loop.md index b415051..29d9b55 100644 --- a/bench/microbench_low_level/result_sum_loop.md +++ b/bench/microbench_low_level/result_sum_loop.md @@ -38,7 +38,6 @@ piconumpy.purepy : 2.17e-03 s (164.1 * Julia) numpy : 1.09e-03 s ( 82.2 * Julia) piconumpy.hpy : 7.39e-04 s ( 55.8 * Julia) piconumpy.cpython_capi : 5.07e-04 s ( 38.3 * Julia) - ``` With Python 3.8.5 (GraalVM CE Native 21.3.0) From 34ad4c1301415b2d4212f3e21e7f41355281765f Mon Sep 17 00:00:00 2001 From: paugier Date: Mon, 13 Dec 2021 21:16:29 +0100 Subject: [PATCH 20/32] Converting floats to floats is expensive and useless --- piconumpy/purepy.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/piconumpy/purepy.py b/piconumpy/purepy.py index a84ad31..2ebd85c 100644 --- a/piconumpy/purepy.py +++ b/piconumpy/purepy.py @@ -2,7 +2,7 @@ class array: __slots__ = ["data", "size"] def __init__(self, data): - self.data = list(float(number) for number in data) + self.data = list(data) self.size = len(self.data) def __add__(self, other): @@ -35,4 +35,3 @@ def empty(size): def zeros(size): return array([0]*size) - From d172daa732d40f54962505b6965c1ad430ca349b Mon Sep 17 00:00:00 2001 From: paugier Date: Mon, 13 Dec 2021 21:23:01 +0100 Subject: [PATCH 21/32] Log and stability microbench --- bench/microbench_low_level/bench.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/bench/microbench_low_level/bench.py b/bench/microbench_low_level/bench.py index 570d26a..6e71ba5 100644 --- a/bench/microbench_low_level/bench.py +++ b/bench/microbench_low_level/bench.py @@ -120,6 +120,8 @@ def board(X_0): size = 10000 +print(f"{method:30s}:", end="", flush=True) + # warming during ~ 1s data_as_list = [random() for _ in range(size)] arr = array(data_as_list) @@ -133,10 +135,10 @@ def median(sequence): return tmp[len(tmp) // 2] -# measure during ~ 2s +# measure during ~ 4s t0 = perf_counter() times = [] -while perf_counter() - t0 < 2.0: +while perf_counter() - t0 < 4.0: data_as_list = [random() for _ in range(size)] arr = array(data_as_list) t_start = perf_counter() @@ -144,4 +146,4 @@ def median(sequence): times.append(perf_counter() - t_start) time = median(times) -print(f"{method:30s}: {time:.2e} s ({time / norm:5.1f} * Julia)") +print(f" {time:.2e} s ({time / norm:5.1f} * Julia)") From 8472fbdeb23d0f0d70d3d8bd4025461d0344b5ad Mon Sep 17 00:00:00 2001 From: paugier Date: Wed, 15 Dec 2021 12:06:15 +0100 Subject: [PATCH 22/32] Add microbench instantiate --- .gitignore | 5 +- bench/microbench_low_level/Makefile | 57 +++++++++---------- bench/microbench_low_level/README.md | 3 + bench/microbench_low_level/bench.py | 13 ++++- .../{ => julia}/bench_board.jl | 0 .../{ => julia}/bench_cort.jl | 0 .../{ => julia}/bench_init_zeros.jl | 0 .../julia/bench_instantiate.jl | 22 +++++++ .../{ => julia}/bench_sum_loop.jl | 0 .../{ => julia}/bench_sum_loop_index.jl | 0 .../microbench_low_level/result_initialize.md | 55 ++++++++++++++++++ 11 files changed, 121 insertions(+), 34 deletions(-) rename bench/microbench_low_level/{ => julia}/bench_board.jl (100%) rename bench/microbench_low_level/{ => julia}/bench_cort.jl (100%) rename bench/microbench_low_level/{ => julia}/bench_init_zeros.jl (100%) create mode 100644 bench/microbench_low_level/julia/bench_instantiate.jl rename bench/microbench_low_level/{ => julia}/bench_sum_loop.jl (100%) rename bench/microbench_low_level/{ => julia}/bench_sum_loop_index.jl (100%) create mode 100644 bench/microbench_low_level/result_initialize.md diff --git a/.gitignore b/.gitignore index 9a709bb..8e8015b 100644 --- a/.gitignore +++ b/.gitignore @@ -9,5 +9,8 @@ build **/tmp*.* **/tmp*.* +**/tmp/* -*_cython.c \ No newline at end of file +*_cython.c + +piconumpy/_piconumpy_hpy.py \ No newline at end of file diff --git a/bench/microbench_low_level/Makefile b/bench/microbench_low_level/Makefile index 15c96e3..1734cbe 100644 --- a/bench/microbench_low_level/Makefile +++ b/bench/microbench_low_level/Makefile @@ -1,51 +1,46 @@ -.PHONY : clean bench_sum_loop bench_sum_loop_index bench_cort bench_init_zeros +IMPLEMENTATION=$(shell python -c 'import sys; print(sys.implementation.cache_tag)') + +.PHONY : clean bench_sum_loop bench_sum_loop_index bench_cort bench_init_zeros bench_instantiate bench_sum_loop: NAME_BENCH=sum_loop -bench_sum_loop: tmp_julia_sum_loop.txt _bench +bench_sum_loop: tmp/sum_loop_julia.txt _bench bench_sum_loop_index: NAME_BENCH=sum_loop_index -bench_sum_loop_index: tmp_julia_sum_loop_index.txt _bench +bench_sum_loop_index: tmp/sum_loop_index_julia.txt _bench bench_cort: NAME_BENCH=cort -bench_cort: tmp_julia_cort.txt _bench +bench_cort: tmp/cort_julia.txt _bench bench_init_zeros: NAME_BENCH=init_zeros -bench_init_zeros: tmp_julia_init_zeros.txt _bench +bench_init_zeros: tmp/init_zeros_julia.txt _bench bench_board: NAME_BENCH=board -bench_board: tmp_julia_board.txt _bench +bench_board: tmp/board_julia.txt _bench + +bench_instantiate: NAME_BENCH=instantiate +bench_instantiate: tmp/instantiate_julia.txt _bench + _bench: @echo bench $(NAME_BENCH) @python -c "from socket import gethostname as f; print('hostname:', f())" @python -c "import sys; from pprint import pprint as p; p({key: sys.implementation.__dict__[key] for key in ('cache_tag', 'version')})" - @python bench.py list $(NAME_BENCH) - @python bench.py purepy $(NAME_BENCH) - @python bench.py numpy $(NAME_BENCH) - @python bench.py _piconumpy_hpy $(NAME_BENCH) - @python bench.py _piconumpy_cpython_capi $(NAME_BENCH) - -tmp_julia_sum_loop.txt: bench_sum_loop.jl - @julia bench_sum_loop.jl > tmp_julia_sum_loop.txt - -tmp_julia_sum_loop_index.txt: bench_sum_loop_index.jl - @julia bench_sum_loop_index.jl > tmp_julia_sum_loop_index.txt - -tmp_julia_cort.txt: bench_cort.jl - @julia bench_cort.jl > tmp_julia_cort.txt - -tmp_julia_init_zeros.txt: bench_init_zeros.jl - @julia bench_init_zeros.jl > tmp_julia_init_zeros.txt - -tmp_julia_board.txt: bench_board.jl - @julia bench_board.jl > tmp_julia_board.txt + @python bench.py list $(NAME_BENCH) | tee tmp/$(NAME_BENCH)_$(IMPLEMENTATION)_list.txt + @python bench.py purepy $(NAME_BENCH) | tee tmp/$(NAME_BENCH)_$(IMPLEMENTATION)_purepy.txt + @python bench.py numpy $(NAME_BENCH) | tee tmp/$(NAME_BENCH)_$(IMPLEMENTATION)_numpy.txt + @python bench.py _piconumpy_hpy $(NAME_BENCH) | tee tmp/$(NAME_BENCH)_$(IMPLEMENTATION)_hpy.txt + @python bench.py _piconumpy_cpython_capi $(NAME_BENCH) | tee tmp/$(NAME_BENCH)_$(IMPLEMENTATION)_cpy_api.txt +tmp/%_julia.txt: julia/bench_%.jl + @mkdir -p tmp + @julia julia/bench_$*.jl > $@ clean: - rm -f tmp_*.txt + rm -rf tmp -produce_traces: tmp_julia_sum_loop.txt - PYPYLOG=jit-log-opt,jit-summary,jit-backend-counts:tmp_traces_list.txt pypy bench.py list - PYPYLOG=jit-log-opt,jit-summary,jit-backend-counts:tmp_traces_piconumpy_list.txt pypy bench.py purepy - PYPYLOG=jit-log-opt,jit-summary,jit-backend-counts:tmp_traces_piconumpy_hpy.txt pypy bench.py _piconumpy_hpy +produce_traces: tmp/sum_loop_julia.txt + @mkdir -p tmp + PYPYLOG=jit-log-opt,jit-summary,jit-backend-counts:tmp/pypylog_list.txt pypy bench.py list + PYPYLOG=jit-log-opt,jit-summary,jit-backend-counts:tmp/pypylog_piconumpy_list.txt pypy bench.py purepy + PYPYLOG=jit-log-opt,jit-summary,jit-backend-counts:tmp/pypylog_piconumpy_hpy.txt pypy bench.py _piconumpy_hpy diff --git a/bench/microbench_low_level/README.md b/bench/microbench_low_level/README.md index e017263..f62e564 100644 --- a/bench/microbench_low_level/README.md +++ b/bench/microbench_low_level/README.md @@ -15,4 +15,7 @@ We measure the performance for functions containing low level Python code. - `board` (command `make bench_board`): few indexing, simple float computations with sin/cos and instantiation of a small array. +- `instantiate` (command `make bench_instantiate`): dominated by the + instantiation/deletion of small arrays of 4 floats. + The files result_*.txt contain few results. diff --git a/bench/microbench_low_level/bench.py b/bench/microbench_low_level/bench.py index 6e71ba5..14a4dea 100644 --- a/bench/microbench_low_level/bench.py +++ b/bench/microbench_low_level/bench.py @@ -42,12 +42,14 @@ method = method.replace("_piconumpy_", "piconumpy.") -tmp_result_julia = Path(f"tmp_julia_{name_bench}.txt") +tmp_result_julia = Path(f"tmp/{name_bench}_julia.txt") if tmp_result_julia.exists(): with open(tmp_result_julia) as file: norm = float(file.read()) else: - print(f"{tmp_result_julia} does not exist. First execute with `make`") + raise RuntimeError( + f"{tmp_result_julia} does not exist. First execute with `make`" + ) def sum_loop(arr): @@ -115,6 +117,13 @@ def board(X_0): return array([u0, v0, dU, dV]) +def instantiate(arr): + x = arr[0] + result = array([x, 3 * x, 6 * x, 9 * x]) + result[0] = 2 * result[1] + return result + + compute_from_arr = locals()[name_bench] diff --git a/bench/microbench_low_level/bench_board.jl b/bench/microbench_low_level/julia/bench_board.jl similarity index 100% rename from bench/microbench_low_level/bench_board.jl rename to bench/microbench_low_level/julia/bench_board.jl diff --git a/bench/microbench_low_level/bench_cort.jl b/bench/microbench_low_level/julia/bench_cort.jl similarity index 100% rename from bench/microbench_low_level/bench_cort.jl rename to bench/microbench_low_level/julia/bench_cort.jl diff --git a/bench/microbench_low_level/bench_init_zeros.jl b/bench/microbench_low_level/julia/bench_init_zeros.jl similarity index 100% rename from bench/microbench_low_level/bench_init_zeros.jl rename to bench/microbench_low_level/julia/bench_init_zeros.jl diff --git a/bench/microbench_low_level/julia/bench_instantiate.jl b/bench/microbench_low_level/julia/bench_instantiate.jl new file mode 100644 index 0000000..a71cb63 --- /dev/null +++ b/bench/microbench_low_level/julia/bench_instantiate.jl @@ -0,0 +1,22 @@ +using Statistics + +function instantiate(arr::Array) + x = arr[1] + result = [x, 3*x, 6*x, 9*x] + result[1] = 2 * result[2] + return result +end + +compute_from_arr = instantiate + +size = 10000 +nb_runs = 200 + +times = zeros(nb_runs) + +for irun in 1:nb_runs + arr = rand(size) + times[irun] = @elapsed compute_from_arr(arr) +end + +println(median(times)) diff --git a/bench/microbench_low_level/bench_sum_loop.jl b/bench/microbench_low_level/julia/bench_sum_loop.jl similarity index 100% rename from bench/microbench_low_level/bench_sum_loop.jl rename to bench/microbench_low_level/julia/bench_sum_loop.jl diff --git a/bench/microbench_low_level/bench_sum_loop_index.jl b/bench/microbench_low_level/julia/bench_sum_loop_index.jl similarity index 100% rename from bench/microbench_low_level/bench_sum_loop_index.jl rename to bench/microbench_low_level/julia/bench_sum_loop_index.jl diff --git a/bench/microbench_low_level/result_initialize.md b/bench/microbench_low_level/result_initialize.md new file mode 100644 index 0000000..883cea1 --- /dev/null +++ b/bench/microbench_low_level/result_initialize.md @@ -0,0 +1,55 @@ +# Microbenchmark instantiate + +We measure the performance for this function: + +```python +def instantiate(arr): + x = arr[0] + result = array([x, 3 * x, 6 * x, 9 * x]) + result[0] = 2 * result[1] + return result +``` + +One can run the benchmarks with `make bench_instantiate`. + +With PyPy3.7, I get: + +``` +bench instantiate +hostname: meige8pcpa79 +{'cache_tag': 'pypy37', + 'version': sys.pypy_version_info(major=7, minor=3, micro=7, releaselevel='final', serial=0)} +list : 1.13e-07 s ( 0.9 * Julia) +piconumpy.purepy : 8.50e-08 s ( 0.7 * Julia) +numpy : ImportError numpy +piconumpy.hpy : 1.69e-06 s ( 13.1 * Julia) +piconumpy.cpython_capi : 1.53e-05 s (118.3 * Julia) +``` + +With CPython: + +``` +bench instantiate +hostname: meige8pcpa79 +{'cache_tag': 'cpython-39', + 'version': sys.version_info(major=3, minor=9, micro=7, releaselevel='final', serial=0)} +list : 1.19e-06 s ( 9.2 * Julia) +piconumpy.purepy : 2.59e-06 s ( 20.0 * Julia) +numpy : 3.63e-06 s ( 28.1 * Julia) +piconumpy.hpy : 1.84e-06 s ( 14.3 * Julia) +piconumpy.cpython_capi : 1.35e-06 s ( 10.5 * Julia) +``` + +With Python 3.8.5 (GraalVM CE Native 21.3.0) + +``` +bench instantiate +hostname: meige8pcpa79 +{'cache_tag': 'graalpython-38', + 'version': sys.version_info(major=3, minor=8, micro=5, releaselevel='alpha', serial=0)} +list : 4.16e-06 s ( 32.3 * Julia) +piconumpy.purepy : 4.15e-06 s ( 32.2 * Julia) +numpy : ImportError numpy +piconumpy.hpy : 7.32e-06 s ( 56.8 * Julia) +piconumpy.cpython_capi : 9.68e-06 s ( 75.0 * Julia) +``` From 593fa754a3faf38be2f91885d8619919ac9a25b4 Mon Sep 17 00:00:00 2001 From: Pierre Augier Date: Sat, 10 May 2025 00:26:12 +0200 Subject: [PATCH 23/32] Modernize pyproject.toml and Makefile --- Makefile | 14 +++++++++----- pyproject.toml | 14 +++++++++----- 2 files changed, 18 insertions(+), 10 deletions(-) diff --git a/Makefile b/Makefile index 3ed0ded..ef7e508 100644 --- a/Makefile +++ b/Makefile @@ -4,11 +4,18 @@ PYTHON := python endif all: - make develop_universal + make editable_universal ifeq ($(PYTHON),python) - make build_ext + make editable endif +editable: + $(PYTHON) -m pip install -e . + +editable_universal: + $(PYTHON) -m pip install -e . --config-settings="--global-option=--hpy-abi=universal" + rm -f piconumpy/_piconumpy_hpy.py + develop: $(PYTHON) setup.py develop @@ -16,9 +23,6 @@ develop_universal: $(PYTHON) setup.py --hpy-abi=universal develop rm -f piconumpy/_piconumpy_hpy.py -pip: - $(PYTHON) -m pip install -e .[dev] - build_ext_universal: $(PYTHON) setup.py --hpy-abi=universal build_ext -if diff --git a/pyproject.toml b/pyproject.toml index 3234fad..52670de 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -5,10 +5,12 @@ description = "An experiment about Numpy and pyhandle/hpy." authors = [ {name = "Pierre Augier", email = "pierre.augier@univ-grenoble-alpes.fr"}, ] -license = {text = "BSD 3-Clause"} +license = "BSD-3-Clause" +license-files = ["LICENSE"] readme = "README.md" keywords = ["numpy", "hpy", "PyPy"] requires-python = ">=3.8" +dependencies = ["hpy>=0.9.0; implementation_name == 'cpython'"] [project.urls] homepage = "https://github.com/paugier/piconumpy" @@ -16,15 +18,17 @@ repository = "https://github.com/paugier/piconumpy" documentation = "https://github.com/paugier/piconumpy" [project.optional-dependencies] -dev = ['transonic', 'numpy', 'pytest', 'pythran'] -full = ['black'] +test = ["pytest", "numpy"] +bench = ['transonic', 'numpy', 'pythran'] +format = ['black'] +full = ["piconumpy[test,bench,format]"] [build-system] requires = [ - "setuptools >= 35.0.2", + "setuptools>=35.0.2", "wheel", "cython", - "hpy >= 0.9.0" + "hpy>=0.9.0; implementation_name == 'cpython'" ] [tool.black] From f0d956f04b29666e37a9d1647fc57085de3ec82a Mon Sep 17 00:00:00 2001 From: Pierre Augier Date: Sun, 11 May 2025 23:03:38 +0200 Subject: [PATCH 24/32] IS_CPY instead of IS_PYPY (for GraalPy) --- Makefile | 1 + bench/bench_cpy_vs_hpy.py | 6 +++--- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/Makefile b/Makefile index ef7e508..6f5b63f 100644 --- a/Makefile +++ b/Makefile @@ -11,6 +11,7 @@ endif editable: $(PYTHON) -m pip install -e . + rm -f piconumpy/_piconumpy_hpy.py editable_universal: $(PYTHON) -m pip install -e . --config-settings="--global-option=--hpy-abi=universal" diff --git a/bench/bench_cpy_vs_hpy.py b/bench/bench_cpy_vs_hpy.py index 9e95fd3..adee1df 100644 --- a/bench/bench_cpy_vs_hpy.py +++ b/bench/bench_cpy_vs_hpy.py @@ -15,7 +15,7 @@ def my_randn(mod, n): return result -IS_PYPY = hasattr(sys, "pypy_version_info") +IS_CPY = sys.implementation.name == "cpython" def runge_kutta_step(mod, f, x0, dt, t=None): @@ -130,7 +130,7 @@ def main(): if norm: print(f" ({t_hpy_univ/norm:4.1f} * Julia)") - if not IS_PYPY: + if IS_CPY: import piconumpy._piconumpy_hpy as pnp_hpy t_hpy_cpy_abi = bench(pnp_hpy, N_SLEDS, N_TIME) @@ -139,7 +139,7 @@ def main(): if norm: print(f" ({t_hpy_cpy_abi/norm:4.1f} * Julia)") - if IS_PYPY: + if not IS_CPY: import piconumpy.purepy as pnp_with_list t_with_list = bench(pnp_with_list, N_SLEDS, N_TIME) From 3523611a0b33a2e8abadaa0dea35fcc2f7f1fc4e Mon Sep 17 00:00:00 2001 From: Pierre Augier Date: Tue, 13 May 2025 07:43:19 +0200 Subject: [PATCH 25/32] microbench: element_wise --- bench/microbench_low_level/Makefile | 2 ++ bench/microbench_low_level/bench.py | 19 ++++++++++++ .../julia/bench_element_wise.jl | 30 +++++++++++++++++++ 3 files changed, 51 insertions(+) create mode 100644 bench/microbench_low_level/julia/bench_element_wise.jl diff --git a/bench/microbench_low_level/Makefile b/bench/microbench_low_level/Makefile index 1734cbe..5f874dd 100644 --- a/bench/microbench_low_level/Makefile +++ b/bench/microbench_low_level/Makefile @@ -21,6 +21,8 @@ bench_board: tmp/board_julia.txt _bench bench_instantiate: NAME_BENCH=instantiate bench_instantiate: tmp/instantiate_julia.txt _bench +bench_element_wise: NAME_BENCH=element_wise +bench_element_wise: tmp/element_wise_julia.txt _bench _bench: @echo bench $(NAME_BENCH) diff --git a/bench/microbench_low_level/bench.py b/bench/microbench_low_level/bench.py index 14a4dea..dcc56d1 100644 --- a/bench/microbench_low_level/bench.py +++ b/bench/microbench_low_level/bench.py @@ -22,6 +22,9 @@ array = ext.array elif method == "list": array = list + if name_bench == "element_wise": + sys.exit(0) + elif method == "numpy": try: @@ -124,6 +127,22 @@ def instantiate(arr): return result +def element_wise(arr): + + dt = 0.1 + x0 = arr + + k1 = x0 * dt + k2 = (x0 + k1 / 2) * dt + k3 = (x0 + k2 / 2) * dt + k4 = (x0 + k3) * dt + # workaround for a pypy bug + # see https://foss.heptapod.net/pypy/pypy/-/issues/3509 + # x_new = x0 + (k1 + 2 * k2 + 2 * k3 + k4) / 6 + x_new = x0 + (k1 + k2 * 2 + k3 * 2 + k4) / 6 + return x_new + + compute_from_arr = locals()[name_bench] diff --git a/bench/microbench_low_level/julia/bench_element_wise.jl b/bench/microbench_low_level/julia/bench_element_wise.jl new file mode 100644 index 0000000..b1e0bd6 --- /dev/null +++ b/bench/microbench_low_level/julia/bench_element_wise.jl @@ -0,0 +1,30 @@ +using Statistics + +function element_wise(arr::Array) + + dt = 0.1 + x0 = arr + + k1 = x0 * dt + k2 = (x0 + k1 / 2) * dt + k3 = (x0 + k2 / 2) * dt + k4 = (x0 + k3) * dt + x_new = x0 + (k1 + 2 * k2 + 2 * k3 + k4) / 6 + + return x_new + +end + +compute_from_arr = element_wise + +size = 10000 +nb_runs = 200 + +times = zeros(nb_runs) + +for irun in 1:nb_runs + arr = rand(size) + times[irun] = @elapsed compute_from_arr(arr) +end + +println(median(times)) \ No newline at end of file From ad9dfc1e3a2197ad56e7c79cf011ba92cb0c0abe Mon Sep 17 00:00:00 2001 From: Pierre Augier Date: Tue, 13 May 2025 07:43:56 +0200 Subject: [PATCH 26/32] Various compatibility improvements --- README.md | 2 +- bench/bench_array1d.py | 21 +++++++++++++-------- bench/make_bench_piconumpy.py | 7 ++++--- pyproject.toml | 3 ++- 4 files changed, 20 insertions(+), 13 deletions(-) diff --git a/README.md b/README.md index 6b2a908..75a5e80 100644 --- a/README.md +++ b/README.md @@ -13,7 +13,7 @@ issue for the future of scientific Python (see [1], [2], [HPy]). [2]: https://morepypy.blogspot.com/2019/12/hpy-kick-off-sprint-report.html [HPy]: https://github.com/hpyproject/hpy -[HPy] is a very ambitious and promissing project to design a new and better C +[HPy] is a very ambitious and promising project to design a new and better C API for interacting with Python interpreters. It should allow people to write Python extensions efficient on different interpreters (CPython, PyPy, Jython, IronPython, GraalPython, RustPython, etc.). diff --git a/bench/bench_array1d.py b/bench/bench_array1d.py index a73a635..ba4426f 100644 --- a/bench/bench_array1d.py +++ b/bench/bench_array1d.py @@ -1,9 +1,14 @@ +import sys + import numpy as np from numpy import array from math import pi, cos, sin -from transonic import jit +from transonic import jit, wait_for_all_extensions + +IS_CPY = sys.implementation.name == "cpython" +IS_PYPY = sys.implementation.name == "pypy" # begin code functions (don't remove this line) @@ -75,15 +80,15 @@ def bench(n_sleds, n_time): # end code functions (don't remove this line) +if IS_CPY or IS_PYPY: -bench_pythran = jit(bench) -# Numba does not support this code... -# bench_numba = jit(backend="numba")(bench) -from transonic import wait_for_all_extensions + bench_pythran = jit(bench) + # Numba does not support this code... + # bench_numba = jit(backend="numba")(bench) -# warmup (compilation of the Pythran extension) -bench_pythran(1, 1) -wait_for_all_extensions() + # warmup (compilation of the Pythran extension) + bench_pythran(1, 1) + wait_for_all_extensions() if __name__ == "__main__": diff --git a/bench/make_bench_piconumpy.py b/bench/make_bench_piconumpy.py index 4a76e9a..eb54d0e 100644 --- a/bench/make_bench_piconumpy.py +++ b/bench/make_bench_piconumpy.py @@ -49,7 +49,8 @@ def create_tmp_file(name_module): from math import pi, cos, sin from pprint import pprint -IS_PYPY = hasattr(sys, 'pypy_version_info') +IS_CPY = sys.implementation.name == "cpython" + """ + code_functions + """ @@ -63,7 +64,7 @@ def create_tmp_file(name_module): from tmp_purepy_array import bench as bench_piconumpy_purepy_array from tmp_cython import bench as bench_cython -if not IS_PYPY: +if IS_CPY: from tmp_hpy import bench as bench_hpy pprint({key: sys.implementation.__dict__[key] for key in ("cache_tag", "version")}) @@ -95,7 +96,7 @@ def timeit(name_func, name, total_duration=2): ) timeit("bench", name="PicoNumpy (CPython C-API)") -if not IS_PYPY: +if IS_CPY: timeit("bench_hpy", name="PicoNumpy (HPy CPy ABI)") timeit("bench_hpy_universal", name="PicoNumpy (HPy Universal)") timeit("bench_pythran", name="Transonic-Pythran") diff --git a/pyproject.toml b/pyproject.toml index 52670de..32f4dd9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -20,8 +20,9 @@ documentation = "https://github.com/paugier/piconumpy" [project.optional-dependencies] test = ["pytest", "numpy"] bench = ['transonic', 'numpy', 'pythran'] +profile = ["gprof2dot"] format = ['black'] -full = ["piconumpy[test,bench,format]"] +full = ["piconumpy[test,bench,profile,format]"] [build-system] requires = [ From 442fdb72bd748b35b252636497381a9785233c43 Mon Sep 17 00:00:00 2001 From: paugier Date: Tue, 13 May 2025 10:54:03 +0200 Subject: [PATCH 27/32] makefile: compat PyPy and GraalPy --- Makefile | 45 ++++++++++++++++++++++++++++++++++++--------- 1 file changed, 36 insertions(+), 9 deletions(-) diff --git a/Makefile b/Makefile index 6f5b63f..0b5cae2 100644 --- a/Makefile +++ b/Makefile @@ -1,38 +1,51 @@ ifeq ($(PYTHON),) -PYTHON := python +PYTHON := python3 endif +IMPLEMENTATION := $(shell $(PYTHON) -c "import sys; print(sys.implementation.name)") + + all: make editable_universal -ifeq ($(PYTHON),python) +ifeq ($(IMPLEMENTATION),cpython) make editable endif + +rm_hpy_py: + rm -f piconumpy/_piconumpy_hpy.py + editable: $(PYTHON) -m pip install -e . - rm -f piconumpy/_piconumpy_hpy.py + make rm_hpy_py editable_universal: $(PYTHON) -m pip install -e . --config-settings="--global-option=--hpy-abi=universal" - rm -f piconumpy/_piconumpy_hpy.py + make rm_hpy_py + +editable_full: + $(PYTHON) -m pip install -e .[full] + make rm_hpy_py + +# deprecated but let's keep them develop: $(PYTHON) setup.py develop + make rm_hpy_py develop_universal: $(PYTHON) setup.py --hpy-abi=universal develop - rm -f piconumpy/_piconumpy_hpy.py + make rm_hpy_py build_ext_universal: $(PYTHON) setup.py --hpy-abi=universal build_ext -if + make rm_hpy_py build_ext: $(PYTHON) setup.py build_ext -if - rm -f piconumpy/_piconumpy_hpy.py + make rm_hpy_py -full: - $(PYTHON) -m pip install -e .[full] format: black -l 82 setup.py piconumpy/*.py @@ -46,4 +59,18 @@ clean: rm -rf build dist piconumpy.egg-info black: - black -l 82 . \ No newline at end of file + black -l 82 . + + +install_pypy: + uv python install pypy + +install_graalpy: + uv python install graalpy + +create_venv_pypy: + $(shell uv python find pypy) -m venv .venv_pypy --upgrade-deps + +create_venv_graalpy: + # cannot use --upgrade-deps because pip is patched for GraalPy + $(shell uv python find graalpy) -m venv .venv_graalpy From 89ff2149ac2cf2e343b51616243b72044a9d8336 Mon Sep 17 00:00:00 2001 From: paugier Date: Tue, 13 May 2025 17:11:39 +0200 Subject: [PATCH 28/32] Add .mdformat.toml --- .mdformat.toml | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 .mdformat.toml diff --git a/.mdformat.toml b/.mdformat.toml new file mode 100644 index 0000000..972483a --- /dev/null +++ b/.mdformat.toml @@ -0,0 +1,3 @@ +wrap = 89 +number = true +end_of_line = "lf" From adfc2f44a4282f77738eed09dd1bea7050c4321a Mon Sep 17 00:00:00 2001 From: paugier Date: Tue, 13 May 2025 17:32:21 +0200 Subject: [PATCH 29/32] Improve README and bench --- Makefile | 3 + README.md | 164 ++++++++++++++++++++-------------- bench/Makefile | 2 +- bench/bench_cpy_vs_hpy.py | 7 +- bench/make_bench_piconumpy.py | 17 ++-- piconumpy/bench.py | 15 ++-- 6 files changed, 124 insertions(+), 84 deletions(-) diff --git a/Makefile b/Makefile index 0b5cae2..d360199 100644 --- a/Makefile +++ b/Makefile @@ -68,6 +68,9 @@ install_pypy: install_graalpy: uv python install graalpy +create_venv_cpy: + $(PYTHON) -m venv .venv_cpy --upgrade-deps + create_venv_pypy: $(shell uv python find pypy) -m venv .venv_pypy --upgrade-deps diff --git a/README.md b/README.md index 75a5e80..57202b8 100644 --- a/README.md +++ b/README.md @@ -5,42 +5,34 @@ **An experiment about Numpy and HPy** The C API of CPython is one of the cause of the success of Python in scientific -computing. In particular, Numpy (and all the Python scientific stack) is built -on top of this API. However, some characteristics of this API start to be an -issue for the future of scientific Python (see [1], [2], [HPy]). +computing. In particular, Numpy (and all the Python scientific stack) is built on top of +this API. However, some characteristics of this API start to be an issue for the future +of scientific Python (see [1], [2], [HPy]). -[1]: https://faster-cpython.readthedocs.io/ -[2]: https://morepypy.blogspot.com/2019/12/hpy-kick-off-sprint-report.html -[HPy]: https://github.com/hpyproject/hpy - -[HPy] is a very ambitious and promising project to design a new and better C -API for interacting with Python interpreters. It should allow people to write -Python extensions efficient on different interpreters (CPython, PyPy, Jython, -IronPython, GraalPython, RustPython, etc.). - -PyPy would be especially useful for some scientific applications. For example -for Integration and ODEs -([scipy.integrate](https://docs.scipy.org/doc/scipy/reference/integrate.html)), -for which there are a lot of callbacks of very small functions. This repository -contains [a tiny benchmark](bench/without_numpy) showing that as long as Numpy -is not used, PyPy is very efficient for such task. Unfortunately, as soon as -Numpy is used, PyPy becomes very slow! +[HPy] is a very ambitious and promising project to design a new and better C API for +interacting with Python interpreters. It should allow people to write Python extensions +efficient on different interpreters (CPython, PyPy, Jython, IronPython, GraalPython, +RustPython, etc.). -[bench/without_numpy]: https://github.com/paugier/piconumpy/blob/master/bench/without_numpy/ +PyPy would be especially useful for some scientific applications. For example for +Integration and ODEs +([scipy.integrate](https://docs.scipy.org/doc/scipy/reference/integrate.html)), for which +there are a lot of callbacks of very small functions. This repository contains +[a tiny benchmark](bench/without_numpy) showing that as long as Numpy is not used, PyPy +is very efficient for such task. Unfortunately, as soon as Numpy is used, PyPy becomes +very slow! -With PicoNumpy, I'd like to study if [HPy] could help for codes using Numpy and -callbacks of small Python functions. +With PicoNumpy, I'd like to study if [HPy] could help for codes using Numpy and callbacks +of small Python functions. -We start by a [simple but realistic benchmark](bench/bench_array1d.py) (the -slow loops only involve pure-Python and very simple Numpy). We then wrote a -tiny ("pico") implementation of a Numpy like object (just sufficient to run the -benchmark). +We start by a [simple but realistic benchmark](bench/bench_array1d.py) (the slow loops +only involve pure-Python and very simple Numpy). We then wrote a tiny ("pico") +implementation of a Numpy like object (just sufficient to run the benchmark). -The next task is to reimplement PicoNumpy using [HPy] and to check if PyPy -could efficiently accelerate [our main benchmark](bench/bench_array1d.py). +The next task is to reimplement PicoNumpy using [HPy] and to check if PyPy could +efficiently accelerate [our main benchmark](bench/bench_array1d.py). -PicoNumpy is really tiny. It just provides an `array` class (one-dimensional) -supporting: +PicoNumpy is really tiny. It just provides an `array` class (one-dimensional) supporting: - Instantiation from a list of floats - Elementwise multiplication and division by a float @@ -48,29 +40,25 @@ supporting: - Indexing - `len` -A good acceleration by PyPy of our example would be a great proof that the -scientific Python community has to invest time and energy on [HPy]. +A good acceleration by PyPy of our example would be a great proof that the scientific +Python community has to invest time and energy on [HPy]. -In the script [bench_array1d.py](bench/bench_array1d.py), Transonic is used for -the benchmark and comparison. With Transonic-Pythran, we typically get a 50 -speedup compared to CPython (and ~400 versus PyPy, which is still very slow for -such codes using Numpy). - -[bench/bench_array1d.py]: https://github.com/paugier/piconumpy/blob/master/bench/bench_array1d.py +In the script [bench_array1d.py](bench/bench_array1d.py), Transonic is used for the +benchmark and comparison. With Transonic-Pythran, we typically get a 50 speedup compared +to CPython (and ~400 versus PyPy, which is still very slow for such codes using Numpy). ## Install and run the benchmarks -**Warning:** PicoNumpy now depends on HPy, which still has to be installed from -the [Git repository](https://github.com/hpyproject/hpy). For now, the -installation is a bit more complex that what is described here (more about this +**Warning:** PicoNumpy depends on HPy >=0.9.0. For now, the installation is a bit more +complex that what is described here (more about this [here](#more-precise-notes-on-how-to-install-and-run-the-benchmarks-with-PyPy)). -`make` should install the package in editable mode. `cd bench; make` should run -the benchmarks. For the benchmarks, Julia is used for a good comparison point -so the command `julia` has to be available. +`make` should install the package in editable mode. `cd bench; make` should run the +benchmarks. For the benchmarks, Julia is used for a good comparison point so the command +`julia` has to be available. -For PyPy, the Makefiles are sensible to the environment variable `PYTHON`, so -you could do: +For PyPy, the Makefiles are sensible to the environment variable `PYTHON`, so you could +do: ```bash export PYTHON=pypy3 @@ -79,8 +67,8 @@ cd bench make ``` -The benchmark code can be profiled for the different implementations with the -commands (you need gprof2dot and graphviz): +The benchmark code can be profiled for the different implementations with the commands +(you need gprof2dot and graphviz): ```bash cd bench @@ -90,48 +78,82 @@ make profile METHOD="purepy" make profile METHOD="cython" ``` -### More precise notes on how to install and run the benchmarks with PyPy +### Notes on how to install and run the benchmarks with PyPy -Download and extract a nightly PyPy build -. Add to the `PATH` environment variable -the path of the directory containing the `pypy` executable (something like -`~/opt/pypy-c-jit-101190-b661dc329618-linux64/bin`). Then, you should be able -to run: +PyPy can be downloaded with UV or manually (for example from + for a nightly build). -```bash -pypy -m ensurepip -pypy -m pip install pip -U -pypy -m pip install numpy cython pytest transonic pythran +With UV, one can run + +```sh +uv python install pypy ``` -One can check which HPy version is vendored with PyPy: +and then get the path towards `pypy` executable with: -```bash -pypy -c "import hpy.universal as u; print(u.get_version())" +```sh +uv python find pypy ``` -gives `('0.0.3', '2196f14')`. +which can give something like +`~/.local/share/uv/python/pypy-3.11.11-linux-x86_64-gnu/bin/pypy`. -Now we can build-install PicoNumpy: +Then, you should be able to create a virtual environment, activate it and build-install +PicoNumpy with ```bash -cd ~/Dev/piconumpy -pypy setup.py --hpy-abi=universal develop +cd ~/dev/piconumpy +~/.local/share/uv/python/pypy-3.11.11-linux-x86_64-gnu/bin/pypy -m venv .venv_pypy --upgrade-deps +. .venv_pypy/bin/activate +pip install -e .[full] ``` -And run the benchmarks with: +and run the benchmarks with: ```bash -export PYTHON="pypy" +cd bench make clean make bench_hpy make ``` +Note that one can check which HPy version is vendored with PyPy: + +```bash +python -c "import hpy.universal as u; print(u.get_version())" +``` + +### Notes on how to install and run the benchmarks with GraalPy + +GraalPy can be downloaded with UV with + +```sh +uv python install graalpy +``` + +Then, one can run + +```sh +cd ~/dev/piconumpy +# cannot use --upgrade-deps because pip is patched for GraalPy +$(uv python find graalpy) -m venv .venv_graalpy +. .venv_graalpy/bin/activate +# we don't try to run the full benchmarks using Pythran on GraalPy +pip install -e .[test,profile] +``` + +and run the benchmarks with: + +```bash +cd bench +make clean +make bench_hpy +``` + ## Few results -As of today (12 October 2021), HPy is not yet ready for high performance, but at -least (with HPy 0.0.3) it runs ! +As of today (12 October 2021), HPy is not yet ready for high performance, but at least +(with HPy 0.0.3) it runs ! ### At home (Intel(R) Core(TM) i5-8400 CPU @ 2.80GHz) @@ -183,3 +205,7 @@ CPython C-API: 0.592 seconds (34.6 * Julia) HPy [Universal]: 0.207 seconds (12.1 * Julia) Python list: 0.093 seconds ( 5.4 * Julia) ``` + +[1]: https://faster-cpython.readthedocs.io/ +[2]: https://morepypy.blogspot.com/2019/12/hpy-kick-off-sprint-report.html +[hpy]: https://github.com/hpyproject/hpy diff --git a/bench/Makefile b/bench/Makefile index 7da6e64..59359f6 100644 --- a/bench/Makefile +++ b/bench/Makefile @@ -7,7 +7,7 @@ ifeq ($(METHOD),) METHOD := cpython-c-api endif -all: tmp.py tmp_result_julia.txt +bench_full: tmp.py tmp_result_julia.txt $(PYTHON) tmp.py tmp.py: bench_array1d.py make_bench_piconumpy.py diff --git a/bench/bench_cpy_vs_hpy.py b/bench/bench_cpy_vs_hpy.py index adee1df..1bb35dd 100644 --- a/bench/bench_cpy_vs_hpy.py +++ b/bench/bench_cpy_vs_hpy.py @@ -1,9 +1,11 @@ -import sys -from time import perf_counter import random +import socket +import sys + from math import pi, cos, sin from pathlib import Path from pprint import pprint +from time import perf_counter here = Path(__file__).absolute().parent @@ -107,6 +109,7 @@ def main(): import piconumpy._piconumpy_cpython_capi as pnp_capi pprint({key: sys.implementation.__dict__[key] for key in ("cache_tag", "version")}) + print(f"hostname: {socket.gethostname()}") tmp_result_julia = Path("tmp_result_julia.txt") if tmp_result_julia.exists(): diff --git a/bench/make_bench_piconumpy.py b/bench/make_bench_piconumpy.py index eb54d0e..4f92bcc 100644 --- a/bench/make_bench_piconumpy.py +++ b/bench/make_bench_piconumpy.py @@ -43,12 +43,17 @@ def create_tmp_file(name_module): code = ( """ +import socket import sys -import numpy as np -from piconumpy import array + from math import pi, cos, sin +from pathlib import Path from pprint import pprint +import numpy as np + +from piconumpy import array + IS_CPY = sys.implementation.name == "cpython" """ @@ -68,10 +73,12 @@ def create_tmp_file(name_module): from tmp_hpy import bench as bench_hpy pprint({key: sys.implementation.__dict__[key] for key in ("cache_tag", "version")}) - +print(f"hostname: {socket.gethostname()}") # get norm from Julia benchmark -with open("tmp_result_julia.txt") as file: - norm = float(file.read()) + +path_julia_result = Path("tmp_result_julia.txt") +assert path_julia_result.exists() +norm = float(path_julia_result.read_text()) max_length_name = len("piconumpy (CPython C-API)") + 2 diff --git a/piconumpy/bench.py b/piconumpy/bench.py index a704e5f..a277d4a 100644 --- a/piconumpy/bench.py +++ b/piconumpy/bench.py @@ -11,6 +11,13 @@ def timeit_verbose( print_time=False, max_length_name=33, ): + if name is None: + name = stmt.split("(")[0] + + fmt_name = f"{{:{max_length_name}s}}" + name = fmt_name.format(name) + print(f"{name}:", end="", flush=True) + result = timeit( stmt, setup=setup, total_duration=total_duration, globals=globals ) @@ -20,18 +27,12 @@ def timeit_verbose( else: norm_given = True - if name is None: - name = stmt.split("(")[0] - - fmt_name = f"{{:{max_length_name}s}}" - name = fmt_name.format(name) - if print_time: raw_time = f" = {result:7.3g} s" else: raw_time = "" - print(f"{name}: {result/norm:5.3g} * norm{raw_time}") + print(f"\r{name}: {result/norm:5.3g} * norm{raw_time}") if not norm_given and not print_time: print(f"norm = {norm:5.3g} s") From 4d53bc593ff69b7a7a77ed7f73b04254f59b0b42 Mon Sep 17 00:00:00 2001 From: Pierre Augier Date: Wed, 14 May 2025 06:48:05 +0200 Subject: [PATCH 30/32] Fix microbench_low_level to understand bench results --- Makefile | 2 +- README.md | 161 ++++++++++++------ bench/Makefile | 9 +- bench/microbench_low_level/bench.py | 13 +- .../microbench_low_level/julia/bench_board.jl | 2 +- .../julia/bench_element_wise.jl | 4 +- .../julia/bench_init_zeros.jl | 2 +- .../julia/bench_instantiate.jl | 2 +- ...lt_initialize.md => result_instantiate.md} | 0 9 files changed, 130 insertions(+), 65 deletions(-) rename bench/microbench_low_level/{result_initialize.md => result_instantiate.md} (100%) diff --git a/Makefile b/Makefile index d360199..92877da 100644 --- a/Makefile +++ b/Makefile @@ -51,7 +51,7 @@ format: black -l 82 setup.py piconumpy/*.py clang-format-7 -i piconumpy/*cpython_capi.c -tests: +tests: rm_hpy_py $(PYTHON) -m pytest piconumpy -s clean: diff --git a/README.md b/README.md index 57202b8..f473ec5 100644 --- a/README.md +++ b/README.md @@ -35,8 +35,8 @@ efficiently accelerate [our main benchmark](bench/bench_array1d.py). PicoNumpy is really tiny. It just provides an `array` class (one-dimensional) supporting: - Instantiation from a list of floats -- Elementwise multiplication and division by a float -- Elementwise addition (of 2 arrays) +- Element-wise multiplication and division by a float +- Element-wise addition (of 2 arrays) - Indexing - `len` @@ -44,31 +44,26 @@ A good acceleration by PyPy of our example would be a great proof that the scien Python community has to invest time and energy on [HPy]. In the script [bench_array1d.py](bench/bench_array1d.py), Transonic is used for the -benchmark and comparison. With Transonic-Pythran, we typically get a 50 speedup compared +benchmark and comparison. With Transonic-Pythran, we typically get a 50 speed-up compared to CPython (and ~400 versus PyPy, which is still very slow for such codes using Numpy). ## Install and run the benchmarks -**Warning:** PicoNumpy depends on HPy >=0.9.0. For now, the installation is a bit more -complex that what is described here (more about this -[here](#more-precise-notes-on-how-to-install-and-run-the-benchmarks-with-PyPy)). +`pip install -e .[full]` should build and install the package in editable mode and all +dependencies necessary for testing, benchmarking and profiling. -`make` should install the package in editable mode. `cd bench; make` should run the -benchmarks. For the benchmarks, Julia is used for a good comparison point so the command -`julia` has to be available. +For the benchmarks, Julia is used for a good comparison point so the command `julia` has +to be available. Different benchmarks can be run with -For PyPy, the Makefiles are sensible to the environment variable `PYTHON`, so you could -do: - -```bash -export PYTHON=pypy3 -make +```sh cd bench -make +make clean +make bench_hpy +make bench_full ``` -The benchmark code can be profiled for the different implementations with the commands -(you need gprof2dot and graphviz): +The benchmark code can be profiled for the different piconumpy implementations with the +commands (you need gprof2dot and graphviz): ```bash cd bench @@ -78,7 +73,7 @@ make profile METHOD="purepy" make profile METHOD="cython" ``` -### Notes on how to install and run the benchmarks with PyPy +### Notes on PyPy PyPy can be downloaded with UV or manually (for example from for a nightly build). @@ -103,7 +98,7 @@ PicoNumpy with ```bash cd ~/dev/piconumpy -~/.local/share/uv/python/pypy-3.11.11-linux-x86_64-gnu/bin/pypy -m venv .venv_pypy --upgrade-deps +$(uv python find pypy) -m venv .venv_pypy --upgrade-deps . .venv_pypy/bin/activate pip install -e .[full] ``` @@ -114,7 +109,7 @@ and run the benchmarks with: cd bench make clean make bench_hpy -make +make bench_full ``` Note that one can check which HPy version is vendored with PyPy: @@ -123,7 +118,7 @@ Note that one can check which HPy version is vendored with PyPy: python -c "import hpy.universal as u; print(u.get_version())" ``` -### Notes on how to install and run the benchmarks with GraalPy +### Notes on GraalPy GraalPy can be downloaded with UV with @@ -152,58 +147,116 @@ make bench_hpy ## Few results -As of today (12 October 2021), HPy is not yet ready for high performance, but at least -(with HPy 0.0.3) it runs ! - -### At home (Intel(R) Core(TM) i5-8400 CPU @ 2.80GHz) +### Full benchmarks - With CPython ``` -Julia : 1 * norm = 0.0171 s -PicoNumpy (CPython C-API) : 11.1 * norm -PicoNumpy (HPy CPy ABI) : 11.6 * norm -PicoNumpy (HPy Universal) : 12.1 * norm -Transonic-Pythran : 0.537 * norm -Numpy : 33.8 * norm -PicoNumpy (purepy) : 43.7 * norm -PicoNumpy (purepy_array) : 44.8 * norm -PicoNumpy (Cython) : 33.9 * norm +{'cache_tag': 'cpython-311', + 'version': sys.version_info(major=3, minor=11, micro=2, releaselevel='final', serial=0)} +hostname: meige7ltpa212 +Julia : 1 * norm = 0.0129 s +PicoNumpy (CPython C-API) : 6.55 * norm +PicoNumpy (HPy CPy ABI) : 7.46 * norm +PicoNumpy (HPy Universal) : 7.92 * norm +Transonic-Pythran : 0.581 * norm +Numpy : 27.1 * norm +PicoNumpy (purepy) : 18.8 * norm +PicoNumpy (purepy_array) : 31.7 * norm +PicoNumpy (Cython) : 23.3 * norm ``` - With PyPy3 ``` -Julia : 1 * norm = 0.0171 s -PicoNumpy (CPython C-API) : 39.2 * norm -PicoNumpy (HPy Universal) : 13.1 * norm -Transonic-Pythran : 0.562 * norm -Numpy : 286 * norm -PicoNumpy (purepy) : 5.59 * norm -PicoNumpy (purepy_array) : 7.41 * norm -PicoNumpy (Cython) : 282 * norm +{'cache_tag': 'pypy311', + 'version': sys.pypy_version_info(major=7, minor=3, micro=19, releaselevel='final', serial=0)} +hostname: meige7ltpa212 +Julia : 1 * norm = 0.0129 s +PicoNumpy (CPython C-API) : 35.5 * norm +PicoNumpy (HPy Universal) : 44.7 * norm +Transonic-Pythran : 0.609 * norm +Numpy : 168 * norm +PicoNumpy (purepy) : 2.98 * norm +PicoNumpy (purepy_array) : 8.7 * norm +PicoNumpy (Cython) : 288 * norm +``` + +Discussion: PyPy with HPy universal is really too slow (44.7x slower than Julia, 6x slower than +CPython with its C-API and even a bit slower that PyPy with cpyext!). This is a big issue +for HPy! + +A reasonable target would be as fast as CPython with its C-API... + +Profiling shows that the issue is related to slow element-wise operations as in the micro-benchmark + +```sh +cd microbench_low_level +make bench_element_wise +``` + +- With CPython + +```sh +bench element_wise +hostname: meige7ltpa212 +{'cache_tag': 'cpython-311', + 'version': sys.version_info(major=3, minor=11, micro=2, releaselevel='final', serial=0)} +piconumpy.purepy : 7.88e-06 s ( 21.9 * Julia) +numpy : 7.88e-06 s ( 21.9 * Julia) +piconumpy.hpy (universal) : 1.34e-06 s ( 3.7 * Julia) +piconumpy.cpython_capi : 6.12e-07 s ( 1.7 * Julia) +``` + +- With PyPy3 + +```sh +bench element_wise +hostname: meige7ltpa212 +{'cache_tag': 'pypy311', + 'version': sys.pypy_version_info(major=7, minor=3, micro=19, releaselevel='final', serial=0)} +piconumpy.purepy : 1.46e-06 s ( 4.1 * Julia) +numpy : 4.39e-05 s (121.9 * Julia) +piconumpy.hpy (universal) : 4.27e-06 s ( 11.9 * Julia) +piconumpy.cpython_capi : 1.84e-06 s ( 5.1 * Julia) ``` -#### Simpler benchmarks (bench/bench_cpy_vs_hpy.py) +### Simpler benchmarks (bench/bench_cpy_vs_hpy.py) - With CPython ``` -{'cache_tag': 'cpython-39', - 'version': sys.version_info(major=3, minor=9, micro=6, releaselevel='final', serial=0)} -CPython C-API: 0.193 seconds (11.2 * Julia) -HPy [Universal]: 0.208 seconds (12.1 * Julia) -HPy [CPy ABI]: 0.201 seconds (11.7 * Julia) +{'cache_tag': 'cpython-311', + 'version': sys.version_info(major=3, minor=11, micro=2, releaselevel='final', serial=0)} +hostname: meige7ltpa212 +Julia: 0.013 seconds +CPython C-API: 0.084 seconds ( 6.5 * Julia) +HPy [Universal]: 0.102 seconds ( 7.9 * Julia) +HPy [CPy ABI]: 0.096 seconds ( 7.4 * Julia) ``` - With PyPy3 ``` -{'cache_tag': 'pypy37', - 'version': sys.pypy_version_info(major=7, minor=3, micro=6, releaselevel='final', serial=0)} -CPython C-API: 0.592 seconds (34.6 * Julia) -HPy [Universal]: 0.207 seconds (12.1 * Julia) -Python list: 0.093 seconds ( 5.4 * Julia) +{'cache_tag': 'pypy311', + 'version': sys.pypy_version_info(major=7, minor=3, micro=19, releaselevel='final', serial=0)} +hostname: meige7ltpa212 +Julia: 0.013 seconds +CPython C-API: 0.382 seconds (29.6 * Julia) +HPy [Universal]: 0.487 seconds (37.6 * Julia) +Python list: 0.037 seconds ( 2.9 * Julia) +``` + +- GraalPy + +``` +{'cache_tag': 'graalpy242-311', + 'version': sys.version_info(major=3, minor=11, micro=7, releaselevel='final', serial=0)} +hostname: meige7ltpa212 +Julia: 0.013 seconds +CPython C-API: 2.123 seconds (164.2 * Julia) +HPy [Universal]: 1.541 seconds (119.2 * Julia) +Python list: 0.542 seconds (41.9 * Julia) ``` [1]: https://faster-cpython.readthedocs.io/ diff --git a/bench/Makefile b/bench/Makefile index 59359f6..eb4c4d4 100644 --- a/bench/Makefile +++ b/bench/Makefile @@ -7,7 +7,7 @@ ifeq ($(METHOD),) METHOD := cpython-c-api endif -bench_full: tmp.py tmp_result_julia.txt +bench_full: rm_hpy_py tmp.py tmp_result_julia.txt $(PYTHON) tmp.py tmp.py: bench_array1d.py make_bench_piconumpy.py @@ -20,11 +20,14 @@ clean: tmp_result_julia.txt: julia bench.jl > tmp_result_julia.txt -profile: tmp.py +profile: rm_hpy_py tmp.py $(PYTHON) profile_piconumpy.py $(METHOD) # with gprof2dot and graphviz (command dot) gprof2dot -f pstats tmp.pstats | dot -Tpng -o tmp_$(METHOD).png eog tmp_$(METHOD).png -bench_hpy: +bench_hpy: rm_hpy_py $(PYTHON) bench_cpy_vs_hpy.py + +rm_hpy_py: + rm -f ../piconumpy/_piconumpy_hpy.py diff --git a/bench/microbench_low_level/bench.py b/bench/microbench_low_level/bench.py index dcc56d1..d9d3a8a 100644 --- a/bench/microbench_low_level/bench.py +++ b/bench/microbench_low_level/bench.py @@ -14,6 +14,10 @@ except IndexError: name_bench = "sum_loop" +try: + size = sys.argv[3] +except IndexError: + size = None if method == "_piconumpy_hpy": from piconumpy.util_hpy import import_ext @@ -44,6 +48,8 @@ if "_piconumpy_" in method: method = method.replace("_piconumpy_", "piconumpy.") +if method.endswith("hpy"): + method += " (universal)" tmp_result_julia = Path(f"tmp/{name_bench}_julia.txt") if tmp_result_julia.exists(): @@ -145,8 +151,11 @@ def element_wise(arr): compute_from_arr = locals()[name_bench] - -size = 10000 +if size is None: + if method.startswith("sum_loop") or method == "cort": + size = 10000 + else: + size = 4 print(f"{method:30s}:", end="", flush=True) diff --git a/bench/microbench_low_level/julia/bench_board.jl b/bench/microbench_low_level/julia/bench_board.jl index 63187b1..69d8b64 100644 --- a/bench/microbench_low_level/julia/bench_board.jl +++ b/bench/microbench_low_level/julia/bench_board.jl @@ -31,7 +31,7 @@ end compute_from_arr = board -size = 10000 +size = 4 nb_runs = 200 times = zeros(nb_runs) diff --git a/bench/microbench_low_level/julia/bench_element_wise.jl b/bench/microbench_low_level/julia/bench_element_wise.jl index b1e0bd6..107d3b8 100644 --- a/bench/microbench_low_level/julia/bench_element_wise.jl +++ b/bench/microbench_low_level/julia/bench_element_wise.jl @@ -17,8 +17,8 @@ end compute_from_arr = element_wise -size = 10000 -nb_runs = 200 +size = 4 +nb_runs = 2000 times = zeros(nb_runs) diff --git a/bench/microbench_low_level/julia/bench_init_zeros.jl b/bench/microbench_low_level/julia/bench_init_zeros.jl index b6035e5..4ac2656 100644 --- a/bench/microbench_low_level/julia/bench_init_zeros.jl +++ b/bench/microbench_low_level/julia/bench_init_zeros.jl @@ -8,7 +8,7 @@ end compute_from_arr = init_zeros -size = 10000 +size = 4 nb_runs = 200 times = zeros(nb_runs) diff --git a/bench/microbench_low_level/julia/bench_instantiate.jl b/bench/microbench_low_level/julia/bench_instantiate.jl index a71cb63..5116e07 100644 --- a/bench/microbench_low_level/julia/bench_instantiate.jl +++ b/bench/microbench_low_level/julia/bench_instantiate.jl @@ -9,7 +9,7 @@ end compute_from_arr = instantiate -size = 10000 +size = 4 nb_runs = 200 times = zeros(nb_runs) diff --git a/bench/microbench_low_level/result_initialize.md b/bench/microbench_low_level/result_instantiate.md similarity index 100% rename from bench/microbench_low_level/result_initialize.md rename to bench/microbench_low_level/result_instantiate.md From b4a99aac9f0cf846f72343082b398ea88bf3e10f Mon Sep 17 00:00:00 2001 From: paugier Date: Wed, 14 May 2025 11:27:34 +0200 Subject: [PATCH 31/32] pythran 0.18 from GitHub --- pyproject.toml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 32f4dd9..ff6b793 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -19,7 +19,9 @@ documentation = "https://github.com/paugier/piconumpy" [project.optional-dependencies] test = ["pytest", "numpy"] -bench = ['transonic', 'numpy', 'pythran'] +# pythran 0.18.0 needed but not yet on PyPI +# (see https://github.com/serge-sans-paille/pythran/pull/2310#issuecomment-2871805768) +bench = ['transonic', 'numpy', 'pythran@git+https://github.com/serge-sans-paille/pythran.git@0.18.0'] profile = ["gprof2dot"] format = ['black'] full = ["piconumpy[test,bench,profile,format]"] From 010d2ad6d688f850fdabff93032b5796e00710d1 Mon Sep 17 00:00:00 2001 From: paugier Date: Wed, 14 May 2025 11:28:20 +0200 Subject: [PATCH 32/32] Update GitHub Actions ci --- .github/workflows/tests.yml | 35 ++++++++++++++--------------------- 1 file changed, 14 insertions(+), 21 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index a824647..e24b666 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -8,39 +8,32 @@ jobs: strategy: max-parallel: 5 matrix: - python-version: ['3.8', '3.9', '3.10', 'pypy-3.7-nightly'] + python-version: ['3.11', '3.12', 'pypy-3.11'] steps: - name: Setup Julia - uses: julia-actions/setup-julia@v1 + uses: julia-actions/setup-julia@v2 - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} - - if: startsWith(matrix.python-version, 'pypy') != true - name: Install HPy (only for CPython) - run: | - # git clone -b release/0.0.3 --single-branch https://github.com/hpyproject/hpy - # cd hpy - # pip install . - pip install hpy>=0.9.0rc1 + - name: Checkout + uses: actions/checkout@v4 - - name: Install dependencies + - name: Build and install deps run: | - pip install numpy cython pytest transonic pythran + pip install -e .[full] - - name: Checkout - uses: actions/checkout@v3 - with: - fetch-depth: 0 + - if: startsWith(matrix.python-version, 'pypy') != true + name: Build universal extension (only needed for CPython) + run: | + pip install -e . --config-settings="--global-option=--hpy-abi=universal" - - name: build + - name: Remove _piconumpy_hpy.py run: | - python setup.py develop - python setup.py --hpy-abi=universal develop rm -f piconumpy/_piconumpy_hpy.py - name: Run tests @@ -52,6 +45,6 @@ jobs: cd bench make tmp_result_julia.txt make bench_hpy - make - # let's rerun bench_hpy to get these results also at the end + make bench_full + # rerun bench_hpy to get these results also at the end make bench_hpy