diff --git a/fesod-benchmark/benchmark.md b/fesod-benchmark/benchmark.md new file mode 100644 index 000000000..678ffee0e --- /dev/null +++ b/fesod-benchmark/benchmark.md @@ -0,0 +1,117 @@ +# FastExcel Benchmark Guide + +This guide provides a comprehensive overview of the FastExcel benchmark module, including how to run benchmarks, interpret the results, and contribute new benchmarks. + +## Overview + +The benchmark module is designed to measure and analyze the performance of FastExcel for various Excel operations, such as reading, writing, and filling data. It uses the [Java Microbenchmark Harness (JMH)](https://openjdk.java.net/projects/code-tools/jmh/) to ensure accurate and reliable benchmark results. + +The key goals of the benchmark module are: + +- To provide a standardized way to measure the performance of FastExcel. +- To track performance regressions and improvements over time. +- To compare the performance of FastExcel with other Excel libraries, such as Apache POI. +- To help users make informed decisions about how to use FastExcel for their specific needs. + +## How to Run Benchmarks + +There are two primary ways to run the benchmarks: using the `benchmark-runner.sh` script or using Maven profiles. + +### Using the `benchmark-runner.sh` Script + +The `benchmark-runner.sh` script provides a convenient way to run the benchmarks with various options. + +**Usage:** + +```bash +./fastexcel-benchmark/scripts/benchmark-runner.sh [OPTIONS] +``` + +**Options:** + +| Option | Description | Default | +|---|---|---| +| `-p`, `--profile` | Benchmark profile (quick, standard, comprehensive) | `standard` | +| `-o`, `--output` | Output directory for results | `benchmark-results` | +| `-j`, `--java-version` | Java version to use | `11` | +| `-m`, `--memory` | JVM heap size | `4g` | +| `-t`, `--pattern` | Benchmark pattern to match | | +| `-d`, `--dataset` | Dataset size (SMALL, MEDIUM, LARGE, EXTRA_LARGE, ALL) | `ALL` | +| `-f`, `--format` | Output format (json, csv, text) | `json` | +| `-r`, `--regression` | Enable regression analysis | | +| `-v`, `--verbose` | Enable verbose output | | +| `-h`, `--help` | Show this help message | | + +**Profiles:** + +- `quick`: Fast execution for development (2 warmup, 3 measurement, 1 fork). +- `standard`: Balanced execution for CI (3 warmup, 5 measurement, 1 fork). +- `comprehensive`: Thorough execution for nightly (5 warmup, 10 measurement, 2 forks). + +**Examples:** + +- Run standard benchmarks: + ```bash + ./fastexcel-benchmark/scripts/benchmark-runner.sh --profile standard + ``` +- Run quick benchmarks for read operations only: + ```bash + ./fastexcel-benchmark/scripts/benchmark-runner.sh --profile quick --pattern "ReadBenchmark" + ``` +- Run comprehensive benchmarks with regression analysis: + ```bash + ./fastexcel-benchmark/scripts/benchmark-runner.sh --profile comprehensive --regression + ``` + +### Using Maven Profiles + +You can also run the benchmarks using Maven profiles. This is useful for integrating the benchmarks into a CI/CD pipeline. + +**Usage:** + +```bash +mvn clean install -f fastexcel-benchmark/pom.xml -P -Dbenchmark.pattern= +``` + +**Profiles:** + +- `benchmark`: The primary profile for running benchmarks. + +**Examples:** + +- Run all benchmarks: + ```bash + mvn clean install -f fastexcel-benchmark/pom.xml -P benchmark + ``` +- Run a specific benchmark: + ```bash + mvn clean install -f fastexcel-benchmark/pom.xml -P benchmark -Dbenchmark.pattern=ReadBenchmark + ``` + +## Benchmark Suites + +The benchmark module includes the following suites: + +- **Comparison:** Benchmarks comparing FastExcel with other libraries (e.g., Apache POI). +- **Config:** Benchmarks related to configuration options. +- **Core:** Core benchmark classes and utilities. +- **Data:** Benchmarks related to data handling and processing. +- **Memory:** Benchmarks focused on memory usage. +- **Operations:** Benchmarks for specific operations like read, write, and fill. +- **Streaming:** Benchmarks for streaming operations. + +## Interpreting Results + +The benchmarks produce output in the format specified by the `--format` option. The default format is JSON. + +The output includes the following information: + +- **Benchmark:** The name of the benchmark. +- **Mode:** The benchmark mode (e.g., `thrpt` for throughput, `avgt` for average time). +- **Threads:** The number of threads used. +- **Forks:** The number of forks used. +- **Warmup Iterations:** The number of warmup iterations. +- **Measurement Iterations:** The number of measurement iterations. +- **Score:** The benchmark score. +- **Score Error:** The error of the benchmark score. +- **Unit:** The unit of the benchmark score (e.g., `ops/s` for operations per second). \ No newline at end of file diff --git a/fesod-benchmark/pom.xml b/fesod-benchmark/pom.xml new file mode 100644 index 000000000..c30f8de65 --- /dev/null +++ b/fesod-benchmark/pom.xml @@ -0,0 +1,195 @@ + + + + 4.0.0 + + + org.apache.fesod + fesod-parent + 1.3.0 + ../pom.xml + + + fesod-benchmark + fesod-benchmark + Comprehensive benchmark module for Fesod performance analysis + + + 1.37 + benchmarks + + + + + org.apache.fesod + fesod + ${project.version} + + + + + org.openjdk.jmh + jmh-core + ${jmh.version} + + + org.openjdk.jmh + jmh-generator-annprocess + ${jmh.version} + provided + + + + + org.apache.poi + poi + + + org.apache.poi + poi-ooxml + + + + + commons-io + commons-io + + + org.slf4j + slf4j-api + + + ch.qos.logback + logback-classic + + + + + com.alibaba.fastjson2 + fastjson2 + + + + + org.junit.jupiter + junit-jupiter + test + + + + + + + org.apache.maven.plugins + maven-compiler-plugin + + ${maven.compiler.source} + ${maven.compiler.target} + + + org.openjdk.jmh + jmh-generator-annprocess + ${jmh.version} + + + + + + + org.apache.maven.plugins + maven-shade-plugin + + + package + + shade + + + ${uberjar.name} + + + org.openjdk.jmh.Main + + + + + + + *:* + + META-INF/*.SF + META-INF/*.DSA + META-INF/*.RSA + + + + + + + + + + org.codehaus.mojo + exec-maven-plugin + 3.1.0 + + + run-benchmarks + integration-test + + exec + + + test + java + -classpath %classpath org.openjdk.jmh.Main .* + + + + + + + + + + benchmark + + + + org.codehaus.mojo + exec-maven-plugin + + test + java + -classpath %classpath org.openjdk.jmh.Main ${benchmark.pattern} + + + + + + + diff --git a/fesod-benchmark/src/main/java/org/apache/fesod/sheet/benchmark/comparison/ComparisonBenchmarkRunner.java b/fesod-benchmark/src/main/java/org/apache/fesod/sheet/benchmark/comparison/ComparisonBenchmarkRunner.java new file mode 100644 index 000000000..67b8ac7aa --- /dev/null +++ b/fesod-benchmark/src/main/java/org/apache/fesod/sheet/benchmark/comparison/ComparisonBenchmarkRunner.java @@ -0,0 +1,75 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.fesod.sheet.benchmark.comparison; + +import java.io.File; +import java.util.UUID; +import org.openjdk.jmh.results.format.ResultFormatType; +import org.openjdk.jmh.runner.Runner; +import org.openjdk.jmh.runner.RunnerException; +import org.openjdk.jmh.runner.options.Options; +import org.openjdk.jmh.runner.options.OptionsBuilder; + +/** + * Enhanced comparison benchmark runner with file-based result collection + * to solve JMH fork=0 static variable sharing issues + */ +public class ComparisonBenchmarkRunner { + + public static void main(String[] args) throws RunnerException { + System.out.println("Starting Enhanced FastExcel vs Apache POI Comparison Benchmark..."); + + // Generate unique session ID for this benchmark run + String sessionId = UUID.randomUUID().toString().substring(0, 8) + "_" + System.currentTimeMillis(); + String resultDirPath = "target/benchmark-results"; + File resultDir = new File(resultDirPath, sessionId); + + System.out.println("Session ID: " + sessionId); + System.out.println("Result directory: " + resultDir.getAbsolutePath()); + + // Ensure target directory exists + File targetDir = new File("target"); + if (!targetDir.exists()) { + targetDir.mkdirs(); + } + + // Configure benchmark options with session ID as system property + Options opt = new OptionsBuilder() + .include(FastExcelVsPoiBenchmark.class.getSimpleName()) + .param("datasetSize", "SMALL", "MEDIUM", "LARGE", "EXTRA_LARGE") + .param("fileFormat", "XLSX") + .forks(1) + .warmupIterations(2) + .measurementIterations(5) + .jvmArgs( + "-Xmx6g", + "-XX:+UseG1GC", + "-Dbenchmark.session.id=" + sessionId, + "-Dbenchmark.result.dir=" + resultDirPath) + .result("target/jmh-results-" + sessionId + ".json") + .resultFormat(ResultFormatType.JSON) + .build(); + + // Run benchmarks + System.out.println("Starting benchmark execution..."); + new Runner(opt).run(); + System.out.println("Benchmark completed successfully!"); + } +} diff --git a/fesod-benchmark/src/main/java/org/apache/fesod/sheet/benchmark/comparison/FastExcelVsPoiBenchmark.java b/fesod-benchmark/src/main/java/org/apache/fesod/sheet/benchmark/comparison/FastExcelVsPoiBenchmark.java new file mode 100644 index 000000000..a72798896 --- /dev/null +++ b/fesod-benchmark/src/main/java/org/apache/fesod/sheet/benchmark/comparison/FastExcelVsPoiBenchmark.java @@ -0,0 +1,918 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.fesod.sheet.benchmark.comparison; + +import com.alibaba.fastjson2.JSONObject; +import com.alibaba.fastjson2.JSONWriter; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicLong; +import org.apache.fesod.sheet.EasyExcel; +import org.apache.fesod.sheet.ExcelReader; +import org.apache.fesod.sheet.ExcelWriter; +import org.apache.fesod.sheet.benchmark.core.AbstractBenchmark; +import org.apache.fesod.sheet.benchmark.core.BenchmarkConfiguration; +import org.apache.fesod.sheet.benchmark.data.BenchmarkData; +import org.apache.fesod.sheet.benchmark.utils.BenchmarkFileUtil; +import org.apache.fesod.sheet.benchmark.utils.DataGenerator; +import org.apache.fesod.sheet.benchmark.utils.MemoryProfiler; +import org.apache.fesod.sheet.context.AnalysisContext; +import org.apache.fesod.sheet.event.AnalysisEventListener; +import org.apache.fesod.sheet.write.metadata.WriteSheet; +import org.apache.poi.hssf.usermodel.HSSFWorkbook; +import org.apache.poi.ss.usermodel.Cell; +import org.apache.poi.ss.usermodel.Row; +import org.apache.poi.ss.usermodel.Sheet; +import org.apache.poi.ss.usermodel.Workbook; +import org.apache.poi.ss.usermodel.WorkbookFactory; +import org.apache.poi.util.IOUtils; +import org.apache.poi.xssf.usermodel.XSSFWorkbook; +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.BenchmarkMode; +import org.openjdk.jmh.annotations.Fork; +import org.openjdk.jmh.annotations.Level; +import org.openjdk.jmh.annotations.Measurement; +import org.openjdk.jmh.annotations.Mode; +import org.openjdk.jmh.annotations.OutputTimeUnit; +import org.openjdk.jmh.annotations.Param; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.Setup; +import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.annotations.TearDown; +import org.openjdk.jmh.annotations.Warmup; +import org.openjdk.jmh.infra.Blackhole; + +/** + * Comprehensive comparison benchmarks between FastExcel (EasyExcel) and Apache POI. + * Tests performance across different operations and dataset sizes. + */ +@BenchmarkMode(Mode.AverageTime) +@OutputTimeUnit(TimeUnit.MILLISECONDS) +@State(Scope.Benchmark) +@Warmup(iterations = 2, time = 3, timeUnit = TimeUnit.SECONDS) +@Measurement(iterations = 3, time = 5, timeUnit = TimeUnit.SECONDS) +@Fork(1) +public class FastExcelVsPoiBenchmark extends AbstractBenchmark { + + // Session ID for file-based result collection (to avoid fork issues) + private static String sessionId; + private static File resultOutputDir; + + @Param({"SMALL", "MEDIUM", "LARGE", "EXTRA_LARGE"}) + private String datasetSize; + + @Param({"XLSX", "XLS"}) + private String fileFormat; + + private File testFile; + private List testDataList; + private MemoryProfiler memoryProfiler; + private List localResults = new ArrayList<>(); + + @Setup(Level.Trial) + public void setupTrial() throws Exception { + super.setupTrial(); + + // Initialize session ID and result directory (only once per trial) + if (sessionId == null) { + sessionId = System.getProperty("benchmark.session.id", String.valueOf(System.currentTimeMillis())); + + String resultDirPath = System.getProperty("benchmark.result.dir", "target/benchmark-results"); + resultOutputDir = new File(resultDirPath, sessionId); + if (!resultOutputDir.exists()) { + resultOutputDir.mkdirs(); + } + + System.out.printf("Benchmark session ID: %s%n", sessionId); + System.out.printf("Result output directory: %s%n", resultOutputDir.getAbsolutePath()); + } + + // Configure Apache POI to handle large files + // Increase the maximum byte array size limit to 1GB (default is 100MB) + IOUtils.setByteArrayMaxOverride(1024 * 1024 * 1024); // 1GB + + // Also set other relevant limits for large file processing + System.setProperty("poi.bytearray.max.override", "1073741824"); // 1GB + System.setProperty("poi.scratchpad.keep.oleentry", "false"); // Reduce memory usage + + // Generate test data + BenchmarkConfiguration.DatasetSize size = BenchmarkConfiguration.DatasetSize.valueOf(datasetSize); + int rowCount = size.getRowCount(); + testDataList = DataGenerator.generateTestData(size); + + BenchmarkConfiguration.FileFormat format = BenchmarkConfiguration.FileFormat.valueOf(fileFormat); + if (format == BenchmarkConfiguration.FileFormat.XLS && rowCount > 65535) { + System.out.printf( + "WARN: XLS format supports max 65536 rows, but dataset size is %d. Truncating data to 65534 rows for benchmark.%n", + rowCount); + testDataList = testDataList.subList(0, 65534); + rowCount = testDataList.size(); + } + + // Create test file + String fileName = String.format("comparison_%s.%s", datasetSize.toLowerCase(), fileFormat.toLowerCase()); + testFile = BenchmarkFileUtil.createTestFile(fileName); + + // Pre-populate test file + writeTestFile(); + + // Initialize memory profiler + memoryProfiler = new MemoryProfiler(); + + System.out.printf("Setup comparison benchmark: %s format, %d rows%n", fileFormat, rowCount); + } + + @TearDown(Level.Trial) + public void tearDownTrial() throws Exception { + if (memoryProfiler != null) { + memoryProfiler.shutdown(); + } + + if (testFile != null && testFile.exists()) { + testFile.delete(); + } + + // Write collected results to individual files for this trial + writeResultsToFiles(); + + super.tearDownTrial(); + } + + protected void setupBenchmark() throws Exception { + // Custom setup logic if needed + } + + protected void tearDownBenchmark() throws Exception { + // Custom teardown logic if needed + } + + @Setup(Level.Invocation) + public void setupInvocation() { + // Clean memory state before each benchmark + System.gc(); + try { + Thread.sleep(50); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + } + } + + // ============================================================================ + // WRITE OPERATION BENCHMARKS + // ============================================================================ + + /** + * FastExcel write benchmark + */ + @Benchmark + public ComparisonResult benchmarkFastExcelWrite(Blackhole blackhole) { + File outputFile = BenchmarkFileUtil.createTestFile(String.format( + "fastexcel_write_%s_%d.%s", + datasetSize.toLowerCase(), System.currentTimeMillis(), fileFormat.toLowerCase())); + + long startTime = System.currentTimeMillis(); + + // Start memory profiling + memoryProfiler.reset(); + memoryProfiler.start(); + long initialMemory = memoryProfiler.getUsedMemory(); + + try { + ExcelWriter excelWriter = + EasyExcel.write(outputFile, BenchmarkData.class).build(); + WriteSheet writeSheet = EasyExcel.writerSheet("TestData").build(); + + excelWriter.write(testDataList, writeSheet); + excelWriter.finish(); + + blackhole.consume(outputFile); + + } catch (Exception e) { + throw new RuntimeException("FastExcel write failed", e); + } finally { + if (outputFile.exists()) { + outputFile.delete(); + } + } + + // Stop memory profiling and get detailed stats + memoryProfiler.stop(); + MemoryProfiler.MemorySnapshot snapshot = memoryProfiler.getSnapshot(); + MemoryProfiler.MemoryStatistics stats = memoryProfiler.getDetailedStatistics(); + + long endTime = System.currentTimeMillis(); + + ComparisonResult result = new ComparisonResult( + "FastExcel", + "Write", + datasetSize, + fileFormat, + testDataList.size(), + endTime - startTime, + snapshot.getMaxUsedMemory(), + stats.getAvgMemory(), + snapshot.getMaxUsedMemory() - initialMemory, + snapshot.getAllocatedMemory(), + (int) snapshot.getGcCount(), + (int) snapshot.getGcTime(), + outputFile.length(), + stats.getMinMemory(), + stats.getStdDevMemory(), + stats.getP95Memory(), + snapshot.getMaxUsedMemory() > 0 ? (double) snapshot.getMaxUsedMemory() / stats.getAvgMemory() : 0.0); + + // Collect result for analysis + localResults.add(result); + + return result; + } + + /** + * Apache POI write benchmark + */ + @Benchmark + public ComparisonResult benchmarkPoiWrite(Blackhole blackhole) { + File outputFile = BenchmarkFileUtil.createTestFile(String.format( + "poi_write_%s_%d.%s", datasetSize.toLowerCase(), System.currentTimeMillis(), fileFormat.toLowerCase())); + + long startTime = System.currentTimeMillis(); + + // Start memory profiling + memoryProfiler.reset(); + memoryProfiler.start(); + long initialMemory = memoryProfiler.getUsedMemory(); + + try (FileOutputStream fos = new FileOutputStream(outputFile)) { + Workbook workbook = createWorkbook(); + Sheet sheet = workbook.createSheet("TestData"); + + // Create header row + Row headerRow = sheet.createRow(0); + String[] headers = {"ID", "String Value", "Int Value", "Double Value", "Date Value", "Boolean Value"}; + for (int i = 0; i < headers.length; i++) { + Cell cell = headerRow.createCell(i); + cell.setCellValue(headers[i]); + } + + // Write data rows + for (int i = 0; i < testDataList.size(); i++) { + BenchmarkData data = testDataList.get(i); + Row row = sheet.createRow(i + 1); + + row.createCell(0).setCellValue(data.getId() != null ? data.getId() : 0); + row.createCell(1).setCellValue(data.getStringData() != null ? data.getStringData() : ""); + row.createCell(2).setCellValue(data.getIntValue() != null ? data.getIntValue() : 0); + row.createCell(3).setCellValue(data.getDoubleValue() != null ? data.getDoubleValue() : 0.0); + if (data.getDateValue() != null) { + row.createCell(4).setCellValue(data.getDateValue()); + } + row.createCell(5).setCellValue(data.getBooleanFlag() != null ? data.getBooleanFlag() : false); + } + + workbook.write(fos); + workbook.close(); + + blackhole.consume(outputFile); + + } catch (Exception e) { + throw new RuntimeException("POI write failed", e); + } finally { + if (outputFile.exists()) { + outputFile.delete(); + } + } + + // Stop memory profiling and get detailed stats + memoryProfiler.stop(); + MemoryProfiler.MemorySnapshot snapshot = memoryProfiler.getSnapshot(); + MemoryProfiler.MemoryStatistics stats = memoryProfiler.getDetailedStatistics(); + + long endTime = System.currentTimeMillis(); + + ComparisonResult result = new ComparisonResult( + "Apache POI", + "Write", + datasetSize, + fileFormat, + testDataList.size(), + endTime - startTime, + snapshot.getMaxUsedMemory(), + stats.getAvgMemory(), + snapshot.getMaxUsedMemory() - initialMemory, + snapshot.getAllocatedMemory(), + (int) snapshot.getGcCount(), + (int) snapshot.getGcTime(), + outputFile.length(), + stats.getMinMemory(), + stats.getStdDevMemory(), + stats.getP95Memory(), + snapshot.getMaxUsedMemory() > 0 ? (double) snapshot.getMaxUsedMemory() / stats.getAvgMemory() : 0.0); + + // Collect result for analysis + localResults.add(result); + + return result; + } + + // ============================================================================ + // READ OPERATION BENCHMARKS + // ============================================================================ + + /** + * FastExcel read benchmark + */ + @Benchmark + public ComparisonResult benchmarkFastExcelRead(Blackhole blackhole) { + // First, create a test file with FastExcel + // createTestFileWithFastExcel(); + + long startTime = System.currentTimeMillis(); + + // Start memory profiling + memoryProfiler.reset(); + memoryProfiler.start(); + long initialMemory = memoryProfiler.getUsedMemory(); + + AtomicLong processedRows = new AtomicLong(0); + + try { + ExcelReader excelReader = EasyExcel.read( + testFile, BenchmarkData.class, new AnalysisEventListener() { + @Override + public void invoke(BenchmarkData data, AnalysisContext context) { + processedRows.incrementAndGet(); + blackhole.consume(data); + } + + @Override + public void doAfterAllAnalysed(AnalysisContext context) { + // Processing complete + } + }) + .build(); + + excelReader.readAll(); + excelReader.finish(); + + } catch (Exception e) { + throw new RuntimeException("FastExcel read failed", e); + } + + // Stop memory profiling and get detailed stats + memoryProfiler.stop(); + MemoryProfiler.MemorySnapshot snapshot = memoryProfiler.getSnapshot(); + MemoryProfiler.MemoryStatistics stats = memoryProfiler.getDetailedStatistics(); + + long endTime = System.currentTimeMillis(); + + ComparisonResult result = new ComparisonResult( + "FastExcel", + "Read", + datasetSize, + fileFormat, + processedRows.get(), + endTime - startTime, + snapshot.getMaxUsedMemory(), + stats.getAvgMemory(), + snapshot.getMaxUsedMemory() - initialMemory, + snapshot.getAllocatedMemory(), + (int) snapshot.getGcCount(), + (int) snapshot.getGcTime(), + testFile.length(), + stats.getMinMemory(), + stats.getStdDevMemory(), + stats.getP95Memory(), + snapshot.getMaxUsedMemory() > 0 ? (double) snapshot.getMaxUsedMemory() / stats.getAvgMemory() : 0.0); + + // Collect result for analysis + localResults.add(result); + + return result; + } + + /** + * Apache POI read benchmark + */ + @Benchmark + public ComparisonResult benchmarkPoiRead(Blackhole blackhole) { + // First, create a test file with POI + // createTestFileWithPoi(); + + long startTime = System.currentTimeMillis(); + + // Start memory profiling + memoryProfiler.reset(); + memoryProfiler.start(); + long initialMemory = memoryProfiler.getUsedMemory(); + + long processedRows = 0; + + try (FileInputStream fis = new FileInputStream(testFile)) { + Workbook workbook = createWorkbook(); + workbook = WorkbookFactory.create(fis); + Sheet sheet = workbook.getSheetAt(0); + + for (Row row : sheet) { + if (row.getRowNum() == 0) continue; // Skip header + + // Read all cells + for (Cell cell : row) { + blackhole.consume(cell.toString()); + } + + processedRows++; + } + + workbook.close(); + + } catch (Exception e) { + throw new RuntimeException("POI read failed", e); + } + + // Stop memory profiling and get detailed stats + memoryProfiler.stop(); + MemoryProfiler.MemorySnapshot snapshot = memoryProfiler.getSnapshot(); + MemoryProfiler.MemoryStatistics stats = memoryProfiler.getDetailedStatistics(); + + long endTime = System.currentTimeMillis(); + + ComparisonResult result = new ComparisonResult( + "Apache POI", + "Read", + datasetSize, + fileFormat, + processedRows, + endTime - startTime, + snapshot.getMaxUsedMemory(), + stats.getAvgMemory(), + snapshot.getMaxUsedMemory() - initialMemory, + snapshot.getAllocatedMemory(), + (int) snapshot.getGcCount(), + (int) snapshot.getGcTime(), + testFile.length(), + stats.getMinMemory(), + stats.getStdDevMemory(), + stats.getP95Memory(), + snapshot.getMaxUsedMemory() > 0 ? (double) snapshot.getMaxUsedMemory() / stats.getAvgMemory() : 0.0); + + // Collect result for analysis + localResults.add(result); + + return result; + } + + // ============================================================================ + // STREAMING OPERATION BENCHMARKS + // ============================================================================ + + /** + * FastExcel streaming read benchmark + */ + @Benchmark + public ComparisonResult benchmarkFastExcelStreamingRead(Blackhole blackhole) { + long startTime = System.currentTimeMillis(); + + // Start memory profiling + memoryProfiler.reset(); + memoryProfiler.start(); + long initialMemory = memoryProfiler.getUsedMemory(); + + AtomicLong processedRows = new AtomicLong(0); + List batch = new ArrayList<>(); + int batchSize = 1000; + + try { + ExcelReader excelReader = EasyExcel.read( + testFile, BenchmarkData.class, new AnalysisEventListener() { + @Override + public void invoke(BenchmarkData data, AnalysisContext context) { + batch.add(data); + processedRows.incrementAndGet(); + + if (batch.size() >= batchSize) { + blackhole.consume(new ArrayList<>(batch)); + batch.clear(); + } + } + + @Override + public void doAfterAllAnalysed(AnalysisContext context) { + if (!batch.isEmpty()) { + blackhole.consume(batch); + batch.clear(); + } + } + }) + .build(); + + excelReader.readAll(); + excelReader.finish(); + + } catch (Exception e) { + throw new RuntimeException("FastExcel streaming read failed", e); + } + + // Stop memory profiling and get detailed stats + memoryProfiler.stop(); + MemoryProfiler.MemorySnapshot snapshot = memoryProfiler.getSnapshot(); + MemoryProfiler.MemoryStatistics stats = memoryProfiler.getDetailedStatistics(); + + long endTime = System.currentTimeMillis(); + + ComparisonResult result = new ComparisonResult( + "FastExcel", + "StreamingRead", + datasetSize, + fileFormat, + processedRows.get(), + endTime - startTime, + snapshot.getMaxUsedMemory(), + stats.getAvgMemory(), + snapshot.getMaxUsedMemory() - initialMemory, + snapshot.getAllocatedMemory(), + (int) snapshot.getGcCount(), + (int) snapshot.getGcTime(), + testFile.length(), + stats.getMinMemory(), + stats.getStdDevMemory(), + stats.getP95Memory(), + snapshot.getMaxUsedMemory() > 0 ? (double) snapshot.getMaxUsedMemory() / stats.getAvgMemory() : 0.0); + + // Collect result for analysis + localResults.add(result); + + return result; + } + + /** + * Apache POI streaming read benchmark (using XSSF streaming) + */ + @Benchmark + public ComparisonResult benchmarkPoiStreamingRead(Blackhole blackhole) { + long startTime = System.currentTimeMillis(); + + // Start memory profiling + memoryProfiler.reset(); + memoryProfiler.start(); + long initialMemory = memoryProfiler.getUsedMemory(); + + long processedRows = 0; + + try (FileInputStream fis = new FileInputStream(testFile)) { + BenchmarkConfiguration.FileFormat format = BenchmarkConfiguration.FileFormat.valueOf(fileFormat); + if (format == BenchmarkConfiguration.FileFormat.XLSX) { + // Use XSSF streaming for XLSX files + org.apache.poi.xssf.streaming.SXSSFWorkbook streamingWorkbook = + new org.apache.poi.xssf.streaming.SXSSFWorkbook(); + + // Read with streaming approach + Workbook workbook = WorkbookFactory.create(fis); + Sheet sheet = workbook.getSheetAt(0); + + for (Row row : sheet) { + if (row.getRowNum() == 0) continue; // Skip header + + for (Cell cell : row) { + blackhole.consume(cell.toString()); + } + + processedRows++; + + // Simulate streaming by processing in batches + if (processedRows % 1000 == 0) { + System.gc(); // Force memory cleanup + } + } + + workbook.close(); + streamingWorkbook.close(); + } else { + // Regular POI for XLS files (no streaming support) + Workbook workbook = WorkbookFactory.create(fis); + Sheet sheet = workbook.getSheetAt(0); + + for (Row row : sheet) { + if (row.getRowNum() == 0) continue; // Skip header + + for (Cell cell : row) { + blackhole.consume(cell.toString()); + } + + processedRows++; + } + + workbook.close(); + } + + } catch (Exception e) { + throw new RuntimeException("POI streaming read failed", e); + } + + // Stop memory profiling and get detailed stats + memoryProfiler.stop(); + MemoryProfiler.MemorySnapshot snapshot = memoryProfiler.getSnapshot(); + MemoryProfiler.MemoryStatistics stats = memoryProfiler.getDetailedStatistics(); + + long endTime = System.currentTimeMillis(); + + ComparisonResult result = new ComparisonResult( + "Apache POI", + "StreamingRead", + datasetSize, + fileFormat, + processedRows, + endTime - startTime, + snapshot.getMaxUsedMemory(), + stats.getAvgMemory(), + snapshot.getMaxUsedMemory() - initialMemory, + snapshot.getAllocatedMemory(), + (int) snapshot.getGcCount(), + (int) snapshot.getGcTime(), + testFile.length(), + stats.getMinMemory(), + stats.getStdDevMemory(), + stats.getP95Memory(), + snapshot.getMaxUsedMemory() > 0 ? (double) snapshot.getMaxUsedMemory() / stats.getAvgMemory() : 0.0); + + // Collect result for analysis + localResults.add(result); + + return result; + } + + // ============================================================================ + // UTILITY METHODS + // ============================================================================ + + /** + * Write local results to individual files for cross-fork communication + */ + private void writeResultsToFiles() { + if (resultOutputDir == null || localResults.isEmpty()) { + return; + } + + try { + for (int i = 0; i < localResults.size(); i++) { + ComparisonResult result = localResults.get(i); + String fileName = String.format( + "result_%s_%s_%s_%s_%d.json", + result.library.replace(" ", "_"), result.operation, datasetSize, fileFormat, i); + File resultFile = new File(resultOutputDir, fileName); + + // Write result as JSON + writeResultAsJson(result, resultFile); + } + + System.out.printf("Wrote %d results to %s%n", localResults.size(), resultOutputDir.getAbsolutePath()); + } catch (Exception e) { + System.err.println("Error writing results to files: " + e.getMessage()); + e.printStackTrace(); + } + } + + /** + * Write a single result as JSON to file + */ + private void writeResultAsJson(ComparisonResult result, File file) throws Exception { + // Use fastjson2 for writing JSON instead of manual string building + JSONObject jsonObject = new JSONObject(); + jsonObject.put("library", result.library); + jsonObject.put("operation", result.operation); + jsonObject.put("datasetSize", result.datasetSize); + jsonObject.put("fileFormat", result.fileFormat); + jsonObject.put("processedRows", result.processedRows); + jsonObject.put("executionTimeMs", result.executionTimeMs); + jsonObject.put("memoryUsageBytes", result.memoryUsageBytes); + jsonObject.put("peakMemoryUsageBytes", result.peakMemoryUsageBytes); + jsonObject.put("avgMemoryUsageBytes", result.avgMemoryUsageBytes); + jsonObject.put("memoryAllocatedBytes", result.memoryAllocatedBytes); + jsonObject.put("gcCount", result.gcCount); + jsonObject.put("gcTimeMs", result.gcTimeMs); + jsonObject.put("fileSizeBytes", result.fileSizeBytes); + jsonObject.put("minMemoryUsageBytes", result.minMemoryUsageBytes); + jsonObject.put("stdDevMemoryUsageBytes", result.stdDevMemoryUsageBytes); + jsonObject.put("p95MemoryUsageBytes", result.p95MemoryUsageBytes); + jsonObject.put("memoryGrowthRate", result.memoryGrowthRate); + jsonObject.put("throughputRowsPerSecond", result.getThroughputRowsPerSecond()); + jsonObject.put("memoryEfficiencyRatio", result.getMemoryEfficiencyRatio()); + jsonObject.put("throughputMBPerSecond", result.getThroughputMBPerSecond()); + + try (java.io.FileWriter writer = new java.io.FileWriter(file)) { + writer.write(jsonObject.toJSONString(JSONWriter.Feature.PrettyFormat)); + } + } + + /** + * Create appropriate workbook based on file format + */ + private Workbook createWorkbook() { + return "XLSX".equals(fileFormat) ? new XSSFWorkbook() : new HSSFWorkbook(); + } + + /** + * Write test data to file for read benchmarks + */ + private void writeTestFile() { + try { + EasyExcel.write(testFile, BenchmarkData.class).sheet("TestData").doWrite(testDataList); + } catch (Exception e) { + throw new RuntimeException("Failed to write test file", e); + } + } + + /** + * Result class for comparison benchmarks + */ + public static class ComparisonResult { + public final String library; + public final String operation; + public final String datasetSize; + public final String fileFormat; + public final long processedRows; + public final long executionTimeMs; + public final long memoryUsageBytes; + public final long peakMemoryUsageBytes; + public final long avgMemoryUsageBytes; + public final long memoryAllocatedBytes; + public final long gcCount; + public final long gcTimeMs; + public final long fileSizeBytes; + public final long minMemoryUsageBytes; + public final long stdDevMemoryUsageBytes; + public final long p95MemoryUsageBytes; + public final double memoryGrowthRate; + + public ComparisonResult( + String library, + String operation, + String datasetSize, + String fileFormat, + long processedRows, + long executionTimeMs, + long memoryUsageBytes, + long fileSizeBytes) { + this.library = library; + this.operation = operation; + this.datasetSize = datasetSize; + this.fileFormat = fileFormat; + this.processedRows = processedRows; + this.executionTimeMs = executionTimeMs; + this.memoryUsageBytes = memoryUsageBytes; + this.peakMemoryUsageBytes = memoryUsageBytes; + this.avgMemoryUsageBytes = memoryUsageBytes; + this.memoryAllocatedBytes = 0; + this.gcCount = 0; + this.gcTimeMs = 0; + this.fileSizeBytes = fileSizeBytes; + this.minMemoryUsageBytes = memoryUsageBytes; + this.stdDevMemoryUsageBytes = 0; + this.p95MemoryUsageBytes = memoryUsageBytes; + this.memoryGrowthRate = 0.0; + } + + public ComparisonResult( + String library, + String operation, + String datasetSize, + String fileFormat, + long processedRows, + long executionTimeMs, + long peakMemoryUsageBytes, + long avgMemoryUsageBytes, + long memoryUsageBytes, + long memoryAllocatedBytes, + long gcCount, + long gcTimeMs, + long fileSizeBytes) { + this.library = library; + this.operation = operation; + this.datasetSize = datasetSize; + this.fileFormat = fileFormat; + this.processedRows = processedRows; + this.executionTimeMs = executionTimeMs; + this.peakMemoryUsageBytes = peakMemoryUsageBytes; + this.avgMemoryUsageBytes = avgMemoryUsageBytes; + this.memoryUsageBytes = memoryUsageBytes; + this.memoryAllocatedBytes = memoryAllocatedBytes; + this.gcCount = gcCount; + this.gcTimeMs = gcTimeMs; + this.fileSizeBytes = fileSizeBytes; + this.minMemoryUsageBytes = avgMemoryUsageBytes; + this.stdDevMemoryUsageBytes = 0; + this.p95MemoryUsageBytes = peakMemoryUsageBytes; + this.memoryGrowthRate = memoryUsageBytes > 0 ? (double) peakMemoryUsageBytes / memoryUsageBytes : 0.0; + } + + public ComparisonResult( + String library, + String operation, + String datasetSize, + String fileFormat, + long processedRows, + long executionTimeMs, + long peakMemoryUsageBytes, + long avgMemoryUsageBytes, + long memoryUsageBytes, + long memoryAllocatedBytes, + long gcCount, + long gcTimeMs, + long fileSizeBytes, + long minMemoryUsageBytes, + long stdDevMemoryUsageBytes, + long p95MemoryUsageBytes, + double memoryGrowthRate) { + this.library = library; + this.operation = operation; + this.datasetSize = datasetSize; + this.fileFormat = fileFormat; + this.processedRows = processedRows; + this.executionTimeMs = executionTimeMs; + this.peakMemoryUsageBytes = peakMemoryUsageBytes; + this.avgMemoryUsageBytes = avgMemoryUsageBytes; + this.memoryUsageBytes = memoryUsageBytes; + this.memoryAllocatedBytes = memoryAllocatedBytes; + this.gcCount = gcCount; + this.gcTimeMs = gcTimeMs; + this.fileSizeBytes = fileSizeBytes; + this.minMemoryUsageBytes = minMemoryUsageBytes; + this.stdDevMemoryUsageBytes = stdDevMemoryUsageBytes; + this.p95MemoryUsageBytes = p95MemoryUsageBytes; + this.memoryGrowthRate = memoryGrowthRate; + } + + public double getThroughputRowsPerSecond() { + return executionTimeMs > 0 ? (processedRows * 1000.0) / executionTimeMs : 0.0; + } + + public double getMemoryEfficiencyRatio() { + return memoryUsageBytes > 0 ? (double) processedRows / memoryUsageBytes : 0.0; + } + + public double getThroughputMBPerSecond() { + return executionTimeMs > 0 ? (fileSizeBytes / (1024.0 * 1024.0)) / (executionTimeMs / 1000.0) : 0.0; + } + + public double getPeakMemoryUsageMB() { + return peakMemoryUsageBytes / (1024.0 * 1024.0); + } + + public double getAvgMemoryUsageMB() { + return avgMemoryUsageBytes / (1024.0 * 1024.0); + } + + public double getMinMemoryUsageMB() { + return minMemoryUsageBytes / (1024.0 * 1024.0); + } + + public double getStdDevMemoryUsageMB() { + return stdDevMemoryUsageBytes / (1024.0 * 1024.0); + } + + public double getP95MemoryUsageMB() { + return p95MemoryUsageBytes / (1024.0 * 1024.0); + } + + @Override + public String toString() { + return String.format( + "%s-%s{rows=%d, time=%dms, peakMem=%.2f MB, avgMem=%.2f MB, minMem=%.2f MB, stdDev=%.2f MB, p95=%.2f MB, allocMem=%d bytes, gcCount=%d, gcTime=%dms, throughput=%.2f rows/s, efficiency=%.2e, growthRate=%.2f}", + library, + operation, + processedRows, + executionTimeMs, + getPeakMemoryUsageMB(), + getAvgMemoryUsageMB(), + getMinMemoryUsageMB(), + getStdDevMemoryUsageMB(), + getP95MemoryUsageMB(), + memoryAllocatedBytes, + gcCount, + gcTimeMs, + getThroughputRowsPerSecond(), + getMemoryEfficiencyRatio(), + memoryGrowthRate); + } + } +} diff --git a/fesod-benchmark/src/main/java/org/apache/fesod/sheet/benchmark/core/AbstractBenchmark.java b/fesod-benchmark/src/main/java/org/apache/fesod/sheet/benchmark/core/AbstractBenchmark.java new file mode 100644 index 000000000..f7ca83724 --- /dev/null +++ b/fesod-benchmark/src/main/java/org/apache/fesod/sheet/benchmark/core/AbstractBenchmark.java @@ -0,0 +1,234 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.fesod.sheet.benchmark.core; + +import java.io.File; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.concurrent.TimeUnit; +import org.apache.fesod.sheet.benchmark.utils.MemoryProfiler; +import org.openjdk.jmh.annotations.BenchmarkMode; +import org.openjdk.jmh.annotations.Fork; +import org.openjdk.jmh.annotations.Level; +import org.openjdk.jmh.annotations.Measurement; +import org.openjdk.jmh.annotations.Mode; +import org.openjdk.jmh.annotations.OutputTimeUnit; +import org.openjdk.jmh.annotations.Setup; +import org.openjdk.jmh.annotations.TearDown; +import org.openjdk.jmh.annotations.Warmup; +import org.openjdk.jmh.infra.Blackhole; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Abstract base class for all benchmarks providing common functionality + */ +@BenchmarkMode(Mode.Throughput) +@OutputTimeUnit(TimeUnit.SECONDS) +@Warmup(iterations = BenchmarkConfiguration.DEFAULT_WARMUP_ITERATIONS, time = 1) +@Measurement(iterations = BenchmarkConfiguration.DEFAULT_MEASUREMENT_ITERATIONS, time = 1) +@Fork(BenchmarkConfiguration.DEFAULT_FORK_COUNT) +public abstract class AbstractBenchmark { + + protected static final Logger logger = LoggerFactory.getLogger(AbstractBenchmark.class); + + protected MemoryProfiler memoryProfiler; + protected String outputDirectory; + protected String benchmarkName; + + @Setup(Level.Trial) + public void setupTrial() throws Exception { + benchmarkName = this.getClass().getSimpleName(); + outputDirectory = BenchmarkConfiguration.DEFAULT_OUTPUT_DIR + File.separator + benchmarkName; + + // Create output directories + createDirectories(); + + // Initialize memory profiler if enabled + if (BenchmarkConfiguration.ENABLE_MEMORY_PROFILING) { + memoryProfiler = new MemoryProfiler(); + } + + logger.info("Setting up benchmark: {}", benchmarkName); + setupBenchmark(); + } + + @TearDown(Level.Trial) + public void tearDownTrial() throws Exception { + logger.info("Tearing down benchmark: {}", benchmarkName); + tearDownBenchmark(); + + if (memoryProfiler != null) { + memoryProfiler.stop(); + } + } + + @Setup(Level.Iteration) + public void setupIteration() throws Exception { + if (memoryProfiler != null) { + try { + memoryProfiler.reset(); + memoryProfiler.start(); + } catch (Exception e) { + logger.warn("Failed to start memory profiler: {}", e.getMessage()); + // Continue without memory profiling + } + } + setupIteration0(); + } + + @TearDown(Level.Iteration) + public void tearDownIteration() throws Exception { + tearDownIteration0(); + + if (memoryProfiler != null) { + try { + memoryProfiler.stop(); + logMemoryUsage(); + } catch (Exception e) { + logger.warn("Failed to stop memory profiler: {}", e.getMessage()); + // Continue without memory profiling + } + } + } + + /** + * Template method for benchmark-specific setup + */ + protected abstract void setupBenchmark() throws Exception; + + /** + * Template method for benchmark-specific teardown + */ + protected abstract void tearDownBenchmark() throws Exception; + + /** + * Template method for iteration-specific setup + */ + protected void setupIteration0() throws Exception { + // Default implementation does nothing + } + + /** + * Template method for iteration-specific teardown + */ + protected void tearDownIteration0() throws Exception { + // Default implementation does nothing + } + + /** + * Create necessary output directories + */ + private void createDirectories() throws IOException { + Path outputPath = Paths.get(outputDirectory); + if (!Files.exists(outputPath)) { + Files.createDirectories(outputPath); + } + } + + /** + * Log memory usage information + */ + private void logMemoryUsage() { + if (memoryProfiler != null) { + MemoryProfiler.MemorySnapshot snapshot = memoryProfiler.getSnapshot(); + logger.debug( + "Memory usage - Max: {} MB, Avg: {} MB, Allocated: {} MB, GC Count: {}, GC Time: {} ms", + snapshot.getMaxUsedMemoryMB(), + snapshot.getAvgUsedMemoryMB(), + snapshot.getAllocatedMemoryMB(), + snapshot.getGcCount(), + snapshot.getGcTime()); + System.out.println("Memory usage - Max: " + + snapshot.getMaxUsedMemoryMB() + + " MB, Avg: " + + snapshot.getAvgUsedMemoryMB() + + " MB, Allocated: " + + snapshot.getAllocatedMemoryMB() + + " MB, GC Count: " + + snapshot.getGcCount() + + ", GC Time: " + + snapshot.getGcTime() + + " ms"); + } + } + + /** + * Get a temporary file path for the given format and size + */ + protected String getTempFilePath( + BenchmarkConfiguration.FileFormat format, BenchmarkConfiguration.DatasetSize size) { + return outputDirectory + File.separator + "temp_" + size.getLabel() + "." + format.getExtension(); + } + + /** + * Clean up temporary files + */ + protected void cleanupTempFiles() { + try { + Path outputPath = Paths.get(outputDirectory); + if (Files.exists(outputPath)) { + Files.walk(outputPath) + .filter(path -> path.getFileName().toString().startsWith("temp_")) + .forEach(path -> { + try { + Files.deleteIfExists(path); + } catch (IOException e) { + logger.warn("Failed to delete temp file: {}", path, e); + } + }); + } + } catch (IOException e) { + logger.warn("Failed to cleanup temp files", e); + } + } + + /** + * Force garbage collection and wait for it to complete + */ + protected void forceGC() { + System.gc(); + System.runFinalization(); + try { + Thread.sleep(100); // Give GC time to complete + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + } + } + + /** + * Consume data to prevent JVM optimizations + */ + protected void consumeData(Object data, Blackhole blackhole) { + if (blackhole != null) { + blackhole.consume(data); + } + } + + /** + * Get current memory usage in bytes + */ + protected long getCurrentMemoryUsage() { + Runtime runtime = Runtime.getRuntime(); + return runtime.totalMemory() - runtime.freeMemory(); + } +} diff --git a/fesod-benchmark/src/main/java/org/apache/fesod/sheet/benchmark/core/BenchmarkConfiguration.java b/fesod-benchmark/src/main/java/org/apache/fesod/sheet/benchmark/core/BenchmarkConfiguration.java new file mode 100644 index 000000000..66106498c --- /dev/null +++ b/fesod-benchmark/src/main/java/org/apache/fesod/sheet/benchmark/core/BenchmarkConfiguration.java @@ -0,0 +1,92 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.fesod.sheet.benchmark.core; + +/** + * Configuration class for benchmark parameters + */ +public class BenchmarkConfiguration { + + /** + * Dataset sizes for benchmarks + */ + public enum DatasetSize { + SMALL(1000, "1K"), + MEDIUM(10000, "10K"), + LARGE(100000, "100K"), + EXTRA_LARGE(1000000, "1M"); + + private final int rowCount; + private final String label; + + DatasetSize(int rowCount, String label) { + this.rowCount = rowCount; + this.label = label; + } + + public int getRowCount() { + return rowCount; + } + + public String getLabel() { + return label; + } + } + + /** + * File formats supported for benchmarking + */ + public enum FileFormat { + XLSX("xlsx"), + XLS("xls"), + CSV("csv"); + + private final String extension; + + FileFormat(String extension) { + this.extension = extension; + } + + public String getExtension() { + return extension; + } + } + + /** + * Benchmark operation types + */ + public enum OperationType { + READ, + WRITE, + FILL, + CONVERT + } + + // Default benchmark configuration + public static final int DEFAULT_WARMUP_ITERATIONS = 3; + public static final int DEFAULT_MEASUREMENT_ITERATIONS = 5; + public static final int DEFAULT_FORK_COUNT = 1; + public static final String DEFAULT_OUTPUT_DIR = "target/benchmark-results"; + public static final String DEFAULT_BASELINE_DIR = "src/test/resources/baselines"; + + // Memory monitoring configuration + public static final boolean ENABLE_MEMORY_PROFILING = true; + public static final long MEMORY_SAMPLING_INTERVAL_MS = 100; +} diff --git a/fesod-benchmark/src/main/java/org/apache/fesod/sheet/benchmark/data/BenchmarkData.java b/fesod-benchmark/src/main/java/org/apache/fesod/sheet/benchmark/data/BenchmarkData.java new file mode 100644 index 000000000..95800b8fc --- /dev/null +++ b/fesod-benchmark/src/main/java/org/apache/fesod/sheet/benchmark/data/BenchmarkData.java @@ -0,0 +1,327 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.fesod.sheet.benchmark.data; + +import java.math.BigDecimal; +import java.time.LocalDate; +import java.time.LocalDateTime; +import org.apache.fesod.sheet.annotation.ExcelProperty; +import org.apache.fesod.sheet.annotation.format.DateTimeFormat; + +/** + * Standard benchmark data model with various data types for comprehensive testing + */ +public class BenchmarkData { + + @ExcelProperty(value = "ID", index = 0) + private Long id; + + @ExcelProperty(value = "String Data", index = 1) + private String stringData; + + @ExcelProperty(value = "Integer Value", index = 2) + private Integer intValue; + + @ExcelProperty(value = "Long Value", index = 3) + private Long longValue; + + @ExcelProperty(value = "Double Value", index = 4) + private Double doubleValue; + + @ExcelProperty(value = "BigDecimal Value", index = 5) + private BigDecimal bigDecimalValue; + + @ExcelProperty(value = "Boolean Flag", index = 6) + private Boolean booleanFlag; + + @ExcelProperty(value = "Date Value", index = 7) + @DateTimeFormat("yyyy-MM-dd") + private LocalDate dateValue; + + @ExcelProperty(value = "DateTime Value", index = 8) + @DateTimeFormat("yyyy-MM-dd HH:mm:ss") + private LocalDateTime dateTimeValue; + + @ExcelProperty(value = "Category", index = 9) + private String category; + + @ExcelProperty(value = "Description", index = 10) + private String description; + + @ExcelProperty(value = "Status", index = 11) + private String status; + + @ExcelProperty(value = "Float Value", index = 12) + private Float floatValue; + + @ExcelProperty(value = "Short Value", index = 13) + private Short shortValue; + + @ExcelProperty(value = "Byte Value", index = 14) + private Byte byteValue; + + @ExcelProperty(value = "Extra Data 1", index = 15) + private String extraData1; + + @ExcelProperty(value = "Extra Data 2", index = 16) + private String extraData2; + + @ExcelProperty(value = "Extra Data 3", index = 17) + private String extraData3; + + @ExcelProperty(value = "Extra Data 4", index = 18) + private String extraData4; + + @ExcelProperty(value = "Extra Data 5", index = 19) + private String extraData5; + + // Default constructor + public BenchmarkData() {} + + // Full constructor + public BenchmarkData( + Long id, + String stringData, + Integer intValue, + Long longValue, + Double doubleValue, + BigDecimal bigDecimalValue, + Boolean booleanFlag, + LocalDate dateValue, + LocalDateTime dateTimeValue, + String category, + String description, + String status, + Float floatValue, + Short shortValue, + Byte byteValue, + String extraData1, + String extraData2, + String extraData3, + String extraData4, + String extraData5) { + this.id = id; + this.stringData = stringData; + this.intValue = intValue; + this.longValue = longValue; + this.doubleValue = doubleValue; + this.bigDecimalValue = bigDecimalValue; + this.booleanFlag = booleanFlag; + this.dateValue = dateValue; + this.dateTimeValue = dateTimeValue; + this.category = category; + this.description = description; + this.status = status; + this.floatValue = floatValue; + this.shortValue = shortValue; + this.byteValue = byteValue; + this.extraData1 = extraData1; + this.extraData2 = extraData2; + this.extraData3 = extraData3; + this.extraData4 = extraData4; + this.extraData5 = extraData5; + } + + // Getters and setters + public Long getId() { + return id; + } + + public void setId(Long id) { + this.id = id; + } + + public String getStringData() { + return stringData; + } + + public void setStringData(String stringData) { + this.stringData = stringData; + } + + public Integer getIntValue() { + return intValue; + } + + public void setIntValue(Integer intValue) { + this.intValue = intValue; + } + + public Long getLongValue() { + return longValue; + } + + public void setLongValue(Long longValue) { + this.longValue = longValue; + } + + public Double getDoubleValue() { + return doubleValue; + } + + public void setDoubleValue(Double doubleValue) { + this.doubleValue = doubleValue; + } + + public BigDecimal getBigDecimalValue() { + return bigDecimalValue; + } + + public void setBigDecimalValue(BigDecimal bigDecimalValue) { + this.bigDecimalValue = bigDecimalValue; + } + + public Boolean getBooleanFlag() { + return booleanFlag; + } + + public void setBooleanFlag(Boolean booleanFlag) { + this.booleanFlag = booleanFlag; + } + + public LocalDate getDateValue() { + return dateValue; + } + + public void setDateValue(LocalDate dateValue) { + this.dateValue = dateValue; + } + + public LocalDateTime getDateTimeValue() { + return dateTimeValue; + } + + public void setDateTimeValue(LocalDateTime dateTimeValue) { + this.dateTimeValue = dateTimeValue; + } + + public String getCategory() { + return category; + } + + public void setCategory(String category) { + this.category = category; + } + + public String getDescription() { + return description; + } + + public void setDescription(String description) { + this.description = description; + } + + public String getStatus() { + return status; + } + + public void setStatus(String status) { + this.status = status; + } + + public Float getFloatValue() { + return floatValue; + } + + public void setFloatValue(Float floatValue) { + this.floatValue = floatValue; + } + + public Short getShortValue() { + return shortValue; + } + + public void setShortValue(Short shortValue) { + this.shortValue = shortValue; + } + + public Byte getByteValue() { + return byteValue; + } + + public void setByteValue(Byte byteValue) { + this.byteValue = byteValue; + } + + public String getExtraData1() { + return extraData1; + } + + public void setExtraData1(String extraData1) { + this.extraData1 = extraData1; + } + + public String getExtraData2() { + return extraData2; + } + + public void setExtraData2(String extraData2) { + this.extraData2 = extraData2; + } + + public String getExtraData3() { + return extraData3; + } + + public void setExtraData3(String extraData3) { + this.extraData3 = extraData3; + } + + public String getExtraData4() { + return extraData4; + } + + public void setExtraData4(String extraData4) { + this.extraData4 = extraData4; + } + + public String getExtraData5() { + return extraData5; + } + + public void setExtraData5(String extraData5) { + this.extraData5 = extraData5; + } + + @Override + public String toString() { + return "BenchmarkData{" + "id=" + + id + ", stringData='" + + stringData + '\'' + ", intValue=" + + intValue + ", longValue=" + + longValue + ", doubleValue=" + + doubleValue + ", bigDecimalValue=" + + bigDecimalValue + ", booleanFlag=" + + booleanFlag + ", dateValue=" + + dateValue + ", dateTimeValue=" + + dateTimeValue + ", category='" + + category + '\'' + ", description='" + + description + '\'' + ", status='" + + status + '\'' + ", floatValue=" + + floatValue + ", shortValue=" + + shortValue + ", byteValue=" + + byteValue + ", extraData1='" + + extraData1 + '\'' + ", extraData2='" + + extraData2 + '\'' + ", extraData3='" + + extraData3 + '\'' + ", extraData4='" + + extraData4 + '\'' + ", extraData5='" + + extraData5 + '\'' + '}'; + } +} diff --git a/fesod-benchmark/src/main/java/org/apache/fesod/sheet/benchmark/operations/FillBenchmark.java b/fesod-benchmark/src/main/java/org/apache/fesod/sheet/benchmark/operations/FillBenchmark.java new file mode 100644 index 000000000..e89ac30c0 --- /dev/null +++ b/fesod-benchmark/src/main/java/org/apache/fesod/sheet/benchmark/operations/FillBenchmark.java @@ -0,0 +1,613 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.fesod.sheet.benchmark.operations; + +import java.time.LocalDate; +import java.time.LocalDateTime; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.concurrent.TimeUnit; +import org.apache.fesod.sheet.EasyExcel; +import org.apache.fesod.sheet.ExcelWriter; +import org.apache.fesod.sheet.benchmark.core.AbstractBenchmark; +import org.apache.fesod.sheet.benchmark.core.BenchmarkConfiguration; +import org.apache.fesod.sheet.benchmark.data.BenchmarkData; +import org.apache.fesod.sheet.benchmark.utils.BenchmarkFileUtil; +import org.apache.fesod.sheet.benchmark.utils.DataGenerator; +import org.apache.fesod.sheet.enums.WriteDirectionEnum; +import org.apache.fesod.sheet.write.metadata.WriteSheet; +import org.apache.fesod.sheet.write.metadata.fill.FillConfig; +import org.apache.fesod.sheet.write.metadata.fill.FillWrapper; +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.BenchmarkMode; +import org.openjdk.jmh.annotations.Fork; +import org.openjdk.jmh.annotations.Measurement; +import org.openjdk.jmh.annotations.Mode; +import org.openjdk.jmh.annotations.OutputTimeUnit; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.annotations.Warmup; +import org.openjdk.jmh.infra.Blackhole; + +/** + * Comprehensive benchmarks for FastExcel fill operations + */ +@BenchmarkMode(Mode.Throughput) +@OutputTimeUnit(TimeUnit.SECONDS) +@State(Scope.Benchmark) +@Warmup(iterations = 3, time = 2) +@Measurement(iterations = 5, time = 3) +@Fork(1) +public class FillBenchmark extends AbstractBenchmark { + + // Template files for different scenarios + private String simpleTemplateFile; + private String complexTemplateFile; + private String horizontalTemplateFile; + private String verticalTemplateFile; + private String multiListTemplateFile; + + // Test data for different sizes + private List smallData; + private List mediumData; + private List largeData; + + // Single objects for simple fills + private BenchmarkData singleData; + private Map simpleMap; + private Map complexMap; + + // Fill configurations + private FillConfig simpleFillConfig; + private FillConfig complexFillConfig; + private FillConfig horizontalFillConfig; + private FillConfig verticalConfig; + private FillConfig horizontalConfig; + private FillConfig forceNewRowConfig; + + // Data generator + private DataGenerator dataGenerator; + + @Override + protected void setupBenchmark() throws Exception { + logger.info("Setting up fill benchmark templates and data..."); + + dataGenerator = new DataGenerator(); + + // Generate test data + generateTestData(); + + // Create template files + createTemplateFiles(); + + // Setup fill configurations + setupFillConfigurations(); + + logger.info("Fill benchmark setup completed"); + } + + @Override + protected void tearDownBenchmark() throws Exception { + // Clean up temporary files + BenchmarkFileUtil.cleanupTempFiles(); + logger.info("Fill benchmark cleanup completed"); + } + + private void generateTestData() { + // Generate data for different sizes + smallData = dataGenerator.generateData(BenchmarkConfiguration.DatasetSize.SMALL); + mediumData = dataGenerator.generateData(BenchmarkConfiguration.DatasetSize.MEDIUM); + largeData = dataGenerator.generateData(BenchmarkConfiguration.DatasetSize.LARGE); + + // Single object for simple fills + singleData = smallData.get(0); + + // Simple map for template variable filling + simpleMap = new HashMap<>(); + simpleMap.put("title", "Benchmark Report"); + simpleMap.put("date", LocalDate.now().toString()); + simpleMap.put("dateTime", LocalDateTime.now().toString()); + simpleMap.put("total", 12345.67); + simpleMap.put("count", 1000); + simpleMap.put("author", "FastExcel Benchmark"); + + // Complex map with nested data + complexMap = new HashMap<>(); + complexMap.put("reportTitle", "Performance Analysis Report"); + complexMap.put("generatedDate", LocalDate.now()); + complexMap.put("generatedTime", LocalDateTime.now()); + complexMap.put("totalRecords", largeData.size()); + complexMap.put("avgProcessingTime", 123.45); + complexMap.put("maxMemoryUsage", "256MB"); + complexMap.put( + "summary", "This is a comprehensive performance analysis report generated by FastExcel benchmarks."); + + logger.debug( + "Generated test data - Small: {}, Medium: {}, Large: {} rows", + smallData.size(), + mediumData.size(), + largeData.size()); + } + + private void createTemplateFiles() { + // Create simple template with basic placeholders + simpleTemplateFile = createSimpleTemplate(); + + // Create complex template with multiple data types + complexTemplateFile = createComplexTemplate(); + + // Create horizontal fill template + horizontalTemplateFile = createHorizontalTemplate(); + + // Create vertical fill template + verticalTemplateFile = createVerticalTemplate(); + + // Create multi-list template + multiListTemplateFile = createMultiListTemplate(); + } + + private String createSimpleTemplate() { + String templatePath = BenchmarkFileUtil.getTempFilePath( + BenchmarkConfiguration.FileFormat.XLSX, BenchmarkConfiguration.DatasetSize.SMALL, "SimpleTemplate"); + + // Create a simple template with placeholders + List> templateData = Arrays.asList( + new HashMap() { + { + put("name", "Simple Fill Test"); + put("date", "2023-01-01"); + put("version", "1.0"); + } + }, + new HashMap() { + { + put("description", "This is a simple fill test"); + put("author", "Test Author"); + put("status", "Active"); + } + }); + + try { + // Write template structure + EasyExcel.write(templatePath).sheet("Template").doWrite(templateData); + + logger.debug("Created simple template: {}", templatePath); + return templatePath; + } catch (Exception e) { + throw new RuntimeException("Failed to create simple template", e); + } + } + + private String createComplexTemplate() { + String templatePath = BenchmarkFileUtil.getTempFilePath( + BenchmarkConfiguration.FileFormat.XLSX, BenchmarkConfiguration.DatasetSize.MEDIUM, "ComplexTemplate"); + + // Create a more complex template with data list placeholders + List> templateData = Arrays.asList( + new HashMap() { + { + put("A", "{reportTitle}"); + put("B", ""); + put("C", ""); + } + }, + new HashMap() { + { + put("A", "Generated on: {generatedDate}"); + put("B", "Time: {generatedTime}"); + put("C", ""); + } + }, + new HashMap() { + { + put("A", "Total Records: {totalRecords}"); + put("B", "Avg Time: {avgProcessingTime}ms"); + put("C", "Max Memory: {maxMemoryUsage}"); + } + }, + new HashMap() { + { + put("A", ""); + put("B", ""); + put("C", ""); + } + }, + new HashMap() { + { + put("A", "Summary: {summary}"); + put("B", ""); + put("C", ""); + } + }, + new HashMap() { + { + put("A", ""); + put("B", ""); + put("C", ""); + } + }, + new HashMap() { + { + put("A", "ID"); + put("B", "String Data"); + put("C", "Value"); + } + }, + new HashMap() { + { + put("A", "{.id}"); + put("B", "{.stringData}"); + put("C", "{.intValue}"); + } + }); + + try { + EasyExcel.write(templatePath).sheet("ComplexTemplate").doWrite(templateData); + + logger.debug("Created complex template: {}", templatePath); + return templatePath; + } catch (Exception e) { + throw new RuntimeException("Failed to create complex template", e); + } + } + + private String createHorizontalTemplate() { + String templatePath = BenchmarkFileUtil.getTempFilePath( + BenchmarkConfiguration.FileFormat.XLSX, + BenchmarkConfiguration.DatasetSize.MEDIUM, + "HorizontalTemplate"); + + // Create horizontal fill template + List> templateData = Arrays.asList( + new HashMap() { + { + put("A", "Horizontal Fill Demo"); + put("B", ""); + put("C", ""); + } + }, + new HashMap() { + { + put("A", "{.id}"); + put("B", "{.stringData}"); + put("C", "{.intValue}"); + } + }); + + try { + EasyExcel.write(templatePath).sheet("HorizontalTemplate").doWrite(templateData); + + logger.debug("Created horizontal template: {}", templatePath); + return templatePath; + } catch (Exception e) { + throw new RuntimeException("Failed to create horizontal template", e); + } + } + + private String createVerticalTemplate() { + String templatePath = BenchmarkFileUtil.getTempFilePath( + BenchmarkConfiguration.FileFormat.XLSX, BenchmarkConfiguration.DatasetSize.MEDIUM, "VerticalTemplate"); + + // Create vertical fill template + List> templateData = Arrays.asList( + new HashMap() { + { + put("A", "Dynamic Fill Test"); + put("B", "Status"); + put("C", "Priority"); + } + }, + new HashMap() { + { + put("A", "{.id}"); + put("B", "{.status}"); + put("C", "{.priority}"); + } + }); + + try { + EasyExcel.write(templatePath).sheet("VerticalTemplate").doWrite(templateData); + + logger.debug("Created vertical template: {}", templatePath); + return templatePath; + } catch (Exception e) { + throw new RuntimeException("Failed to create vertical template", e); + } + } + + private String createMultiListTemplate() { + String templatePath = BenchmarkFileUtil.getTempFilePath( + BenchmarkConfiguration.FileFormat.XLSX, BenchmarkConfiguration.DatasetSize.LARGE, "MultiListTemplate"); + + // Create multi-list template + List> templateData = Arrays.asList( + new HashMap() { + { + put("Report", "Performance Report"); + put("Date", "{date}"); + put("Version", "{version}"); + } + }, + new HashMap() { + { + put("Metric", "Value"); + put("Status", "Threshold"); + put("Notes", "Comments"); + } + }); + + try { + EasyExcel.write(templatePath).sheet("MultiListTemplate").doWrite(templateData); + + logger.debug("Created multi-list template: {}", templatePath); + return templatePath; + } catch (Exception e) { + throw new RuntimeException("Failed to create multi-list template", e); + } + } + + private void setupFillConfigurations() { + verticalConfig = + FillConfig.builder().direction(WriteDirectionEnum.VERTICAL).build(); + + horizontalConfig = + FillConfig.builder().direction(WriteDirectionEnum.HORIZONTAL).build(); + + forceNewRowConfig = FillConfig.builder() + .direction(WriteDirectionEnum.VERTICAL) + .forceNewRow(true) + .build(); + } + + // Simple fill benchmarks + @Benchmark + public void fillSimpleMap(Blackhole blackhole) throws Exception { + String outputFile = BenchmarkFileUtil.getTempFilePath( + BenchmarkConfiguration.FileFormat.XLSX, BenchmarkConfiguration.DatasetSize.SMALL, "FillSimpleMap"); + + EasyExcel.write(outputFile).withTemplate(simpleTemplateFile).sheet().doFill(simpleMap); + + long fileSize = BenchmarkFileUtil.getFileSize(outputFile); + consumeData(fileSize, blackhole); + } + + @Benchmark + public void fillSingleObject(Blackhole blackhole) throws Exception { + String outputFile = BenchmarkFileUtil.getTempFilePath( + BenchmarkConfiguration.FileFormat.XLSX, BenchmarkConfiguration.DatasetSize.SMALL, "FillSingleObject"); + + EasyExcel.write(outputFile, BenchmarkData.class) + .withTemplate(simpleTemplateFile) + .sheet() + .doFill(singleData); + + long fileSize = BenchmarkFileUtil.getFileSize(outputFile); + consumeData(fileSize, blackhole); + } + + // List fill benchmarks - different sizes + @Benchmark + public void fillSmallList(Blackhole blackhole) throws Exception { + String outputFile = BenchmarkFileUtil.getTempFilePath( + BenchmarkConfiguration.FileFormat.XLSX, BenchmarkConfiguration.DatasetSize.SMALL, "FillSmallList"); + + EasyExcel.write(outputFile, BenchmarkData.class) + .withTemplate(complexTemplateFile) + .sheet() + .doFill(smallData); + + long fileSize = BenchmarkFileUtil.getFileSize(outputFile); + consumeData(fileSize, blackhole); + } + + @Benchmark + public void fillMediumList(Blackhole blackhole) throws Exception { + String outputFile = BenchmarkFileUtil.getTempFilePath( + BenchmarkConfiguration.FileFormat.XLSX, BenchmarkConfiguration.DatasetSize.MEDIUM, "FillMediumList"); + + EasyExcel.write(outputFile, BenchmarkData.class) + .withTemplate(complexTemplateFile) + .sheet() + .doFill(mediumData); + + long fileSize = BenchmarkFileUtil.getFileSize(outputFile); + consumeData(fileSize, blackhole); + } + + @Benchmark + public void fillLargeList(Blackhole blackhole) throws Exception { + String outputFile = BenchmarkFileUtil.getTempFilePath( + BenchmarkConfiguration.FileFormat.XLSX, BenchmarkConfiguration.DatasetSize.LARGE, "FillLargeList"); + + EasyExcel.write(outputFile, BenchmarkData.class) + .withTemplate(complexTemplateFile) + .sheet() + .doFill(largeData); + + long fileSize = BenchmarkFileUtil.getFileSize(outputFile); + consumeData(fileSize, blackhole); + } + + // Directional fill benchmarks + @Benchmark + public void fillHorizontal(Blackhole blackhole) throws Exception { + String outputFile = BenchmarkFileUtil.getTempFilePath( + BenchmarkConfiguration.FileFormat.XLSX, BenchmarkConfiguration.DatasetSize.MEDIUM, "FillHorizontal"); + + try (ExcelWriter excelWriter = EasyExcel.write(outputFile, BenchmarkData.class) + .withTemplate(horizontalTemplateFile) + .build()) { + WriteSheet writeSheet = EasyExcel.writerSheet().build(); + excelWriter.fill(mediumData, horizontalConfig, writeSheet); + } + + long fileSize = BenchmarkFileUtil.getFileSize(outputFile); + consumeData(fileSize, blackhole); + } + + @Benchmark + public void fillVertical(Blackhole blackhole) throws Exception { + String outputFile = BenchmarkFileUtil.getTempFilePath( + BenchmarkConfiguration.FileFormat.XLSX, BenchmarkConfiguration.DatasetSize.MEDIUM, "FillVertical"); + + try (ExcelWriter excelWriter = EasyExcel.write(outputFile, BenchmarkData.class) + .withTemplate(verticalTemplateFile) + .build()) { + WriteSheet writeSheet = EasyExcel.writerSheet().build(); + excelWriter.fill(mediumData, verticalConfig, writeSheet); + } + + long fileSize = BenchmarkFileUtil.getFileSize(outputFile); + consumeData(fileSize, blackhole); + } + + @Benchmark + public void fillForceNewRow(Blackhole blackhole) throws Exception { + String outputFile = BenchmarkFileUtil.getTempFilePath( + BenchmarkConfiguration.FileFormat.XLSX, BenchmarkConfiguration.DatasetSize.MEDIUM, "FillForceNewRow"); + + try (ExcelWriter excelWriter = EasyExcel.write(outputFile, BenchmarkData.class) + .withTemplate(verticalTemplateFile) + .build()) { + WriteSheet writeSheet = EasyExcel.writerSheet().build(); + excelWriter.fill(mediumData, forceNewRowConfig, writeSheet); + } + + long fileSize = BenchmarkFileUtil.getFileSize(outputFile); + consumeData(fileSize, blackhole); + } + + // Multi-list fill benchmarks + @Benchmark + public void fillMultipleLists(Blackhole blackhole) throws Exception { + String outputFile = BenchmarkFileUtil.getTempFilePath( + BenchmarkConfiguration.FileFormat.XLSX, BenchmarkConfiguration.DatasetSize.MEDIUM, "FillMultipleLists"); + + try (ExcelWriter excelWriter = + EasyExcel.write(outputFile).withTemplate(multiListTemplateFile).build()) { + WriteSheet writeSheet = EasyExcel.writerSheet().build(); + + // Fill multiple lists with different prefixes + excelWriter.fill(new FillWrapper("data1", smallData), writeSheet); + excelWriter.fill(new FillWrapper("data2", mediumData), writeSheet); + + // Fill summary data + Map summary = new HashMap<>(); + summary.put("total", smallData.size() + mediumData.size()); + summary.put("date", LocalDate.now().toString()); + excelWriter.fill(summary, writeSheet); + } + + long fileSize = BenchmarkFileUtil.getFileSize(outputFile); + consumeData(fileSize, blackhole); + } + + // Complex fill scenarios + @Benchmark + public void fillComplexMixed(Blackhole blackhole) throws Exception { + String outputFile = BenchmarkFileUtil.getTempFilePath( + BenchmarkConfiguration.FileFormat.XLSX, BenchmarkConfiguration.DatasetSize.LARGE, "FillComplexMixed"); + + try (ExcelWriter excelWriter = + EasyExcel.write(outputFile).withTemplate(complexTemplateFile).build()) { + WriteSheet writeSheet = EasyExcel.writerSheet().build(); + + // Fill header variables + excelWriter.fill(complexMap, writeSheet); + + // Fill data list + excelWriter.fill(largeData, forceNewRowConfig, writeSheet); + } + + long fileSize = BenchmarkFileUtil.getFileSize(outputFile); + consumeData(fileSize, blackhole); + } + + // Streaming fill benchmark + @Benchmark + public void fillStreaming(Blackhole blackhole) throws Exception { + String outputFile = BenchmarkFileUtil.getTempFilePath( + BenchmarkConfiguration.FileFormat.XLSX, BenchmarkConfiguration.DatasetSize.LARGE, "FillStreaming"); + + try (ExcelWriter excelWriter = EasyExcel.write(outputFile, BenchmarkData.class) + .withTemplate(complexTemplateFile) + .build()) { + WriteSheet writeSheet = EasyExcel.writerSheet().build(); + + // Fill in batches to test streaming behavior + int batchSize = 1000; + for (int i = 0; i < largeData.size(); i += batchSize) { + int endIndex = Math.min(i + batchSize, largeData.size()); + List batch = largeData.subList(i, endIndex); + excelWriter.fill(batch, verticalConfig, writeSheet); + } + } + + long fileSize = BenchmarkFileUtil.getFileSize(outputFile); + consumeData(fileSize, blackhole); + } + + // Memory efficient fill benchmark + @Benchmark + public void fillMemoryEfficient(Blackhole blackhole) throws Exception { + String outputFile = BenchmarkFileUtil.getTempFilePath( + BenchmarkConfiguration.FileFormat.XLSX, + BenchmarkConfiguration.DatasetSize.LARGE, + "FillMemoryEfficient"); + + try (ExcelWriter excelWriter = EasyExcel.write(outputFile, BenchmarkData.class) + .withTemplate(complexTemplateFile) + .build()) { + WriteSheet writeSheet = EasyExcel.writerSheet().build(); + + // Generate and fill data on-the-fly to test memory efficiency + DataGenerator.DataStream dataStream = + dataGenerator.generateStreamingData(BenchmarkConfiguration.DatasetSize.LARGE.getRowCount()); + + List batch = new ArrayList<>(); + int batchSize = 500; + + for (BenchmarkData data : dataStream) { + batch.add(data); + + if (batch.size() >= batchSize) { + excelWriter.fill(batch, verticalConfig, writeSheet); + batch.clear(); + + // Periodic GC to test memory efficiency + if (batch.size() % (batchSize * 10) == 0) { + forceGC(); + } + } + } + + // Fill remaining data + if (!batch.isEmpty()) { + excelWriter.fill(batch, verticalConfig, writeSheet); + } + } + + long fileSize = BenchmarkFileUtil.getFileSize(outputFile); + consumeData(fileSize, blackhole); + } +} diff --git a/fesod-benchmark/src/main/java/org/apache/fesod/sheet/benchmark/operations/ReadBenchmark.java b/fesod-benchmark/src/main/java/org/apache/fesod/sheet/benchmark/operations/ReadBenchmark.java new file mode 100644 index 000000000..dc76bb606 --- /dev/null +++ b/fesod-benchmark/src/main/java/org/apache/fesod/sheet/benchmark/operations/ReadBenchmark.java @@ -0,0 +1,449 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.fesod.sheet.benchmark.operations; + +import java.io.FileInputStream; +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicLong; +import org.apache.fesod.sheet.EasyExcel; +import org.apache.fesod.sheet.benchmark.core.AbstractBenchmark; +import org.apache.fesod.sheet.benchmark.core.BenchmarkConfiguration; +import org.apache.fesod.sheet.benchmark.data.BenchmarkData; +import org.apache.fesod.sheet.benchmark.utils.BenchmarkFileUtil; +import org.apache.fesod.sheet.benchmark.utils.DataGenerator; +import org.apache.fesod.sheet.context.AnalysisContext; +import org.apache.fesod.sheet.read.listener.ReadListener; +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.BenchmarkMode; +import org.openjdk.jmh.annotations.Fork; +import org.openjdk.jmh.annotations.Measurement; +import org.openjdk.jmh.annotations.Mode; +import org.openjdk.jmh.annotations.OutputTimeUnit; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.annotations.Warmup; +import org.openjdk.jmh.infra.Blackhole; + +/** + * Comprehensive benchmarks for FastExcel read operations + */ +@BenchmarkMode(Mode.Throughput) +@OutputTimeUnit(TimeUnit.SECONDS) +@State(Scope.Benchmark) +@Warmup(iterations = 3, time = 2) +@Measurement(iterations = 5, time = 3) +@Fork(1) +public class ReadBenchmark extends AbstractBenchmark { + + // Test files for different sizes and formats + private String xlsxSmallFile; + private String xlsxMediumFile; + private String xlsEXTRA_LARGEFile; + private String xlsxExtraLargeFile; + + private String csvSmallFile; + private String csvMediumFile; + private String csvLargeFile; + private String csvExtraLargeFile; + + // Listeners for different testing scenarios + private CountingReadListener countingListener; + private CollectingReadListener collectingListener; + private ProcessingReadListener processingListener; + + @Override + protected void setupBenchmark() throws Exception { + logger.info("Setting up read benchmark test files..."); + + // Generate test files for all sizes and formats + generateTestFiles(); + + // Initialize listeners + countingListener = new CountingReadListener(); + collectingListener = new CollectingReadListener(); + processingListener = new ProcessingReadListener(); + + logger.info("Read benchmark setup completed"); + } + + @Override + protected void tearDownBenchmark() throws Exception { + // Clean up temporary files + BenchmarkFileUtil.cleanupTempFiles(); + logger.info("Read benchmark cleanup completed"); + } + + @Override + protected void setupIteration0() throws Exception { + // Reset listeners for each iteration + countingListener.reset(); + collectingListener.reset(); + processingListener.reset(); + } + + private void generateTestFiles() { + DataGenerator generator = new DataGenerator(); + + // Generate XLSX files + xlsxSmallFile = generateAndWriteTestFile( + BenchmarkConfiguration.FileFormat.XLSX, BenchmarkConfiguration.DatasetSize.SMALL, generator); + xlsxMediumFile = generateAndWriteTestFile( + BenchmarkConfiguration.FileFormat.XLSX, BenchmarkConfiguration.DatasetSize.MEDIUM, generator); + xlsEXTRA_LARGEFile = generateAndWriteTestFile( + BenchmarkConfiguration.FileFormat.XLSX, BenchmarkConfiguration.DatasetSize.LARGE, generator); + xlsxExtraLargeFile = generateAndWriteTestFile( + BenchmarkConfiguration.FileFormat.XLSX, BenchmarkConfiguration.DatasetSize.EXTRA_LARGE, generator); + + // Generate CSV files + csvSmallFile = generateAndWriteTestFile( + BenchmarkConfiguration.FileFormat.CSV, BenchmarkConfiguration.DatasetSize.SMALL, generator); + csvMediumFile = generateAndWriteTestFile( + BenchmarkConfiguration.FileFormat.CSV, BenchmarkConfiguration.DatasetSize.MEDIUM, generator); + csvLargeFile = generateAndWriteTestFile( + BenchmarkConfiguration.FileFormat.CSV, BenchmarkConfiguration.DatasetSize.LARGE, generator); + csvExtraLargeFile = generateAndWriteTestFile( + BenchmarkConfiguration.FileFormat.CSV, BenchmarkConfiguration.DatasetSize.EXTRA_LARGE, generator); + } + + private String generateAndWriteTestFile( + BenchmarkConfiguration.FileFormat format, + BenchmarkConfiguration.DatasetSize size, + DataGenerator generator) { + String filePath = BenchmarkFileUtil.getTempFilePath(format, size, "ReadBenchmark"); + List data = generator.generateData(size); + + try { + EasyExcel.write(filePath, BenchmarkData.class) + .sheet("BenchmarkData") + .doWrite(data); + + logger.debug( + "Generated test file: {} ({} rows, {})", + filePath, + size.getRowCount(), + BenchmarkFileUtil.getFileSizeFormatted(filePath)); + return filePath; + } catch (Exception e) { + throw new RuntimeException("Failed to generate test file: " + filePath, e); + } + } + + // XLSX Read Benchmarks - Different sizes + @Benchmark + public void readXlsxSmall(Blackhole blackhole) throws Exception { + EasyExcel.read(xlsxSmallFile, BenchmarkData.class, countingListener) + .sheet() + .doRead(); + consumeData(countingListener.getCount(), blackhole); + } + + @Benchmark + public void readXlsxMedium(Blackhole blackhole) throws Exception { + EasyExcel.read(xlsxMediumFile, BenchmarkData.class, countingListener) + .sheet() + .doRead(); + consumeData(countingListener.getCount(), blackhole); + } + + @Benchmark + public void readXlsEXTRA_LARGE(Blackhole blackhole) throws Exception { + EasyExcel.read(xlsEXTRA_LARGEFile, BenchmarkData.class, countingListener) + .sheet() + .doRead(); + consumeData(countingListener.getCount(), blackhole); + } + + @Benchmark + public void readXlsxExtraLarge(Blackhole blackhole) throws Exception { + EasyExcel.read(xlsxExtraLargeFile, BenchmarkData.class, countingListener) + .sheet() + .doRead(); + consumeData(countingListener.getCount(), blackhole); + } + + // CSV Read Benchmarks - Different sizes + @Benchmark + public void readCsvSmall(Blackhole blackhole) throws Exception { + EasyExcel.read(csvSmallFile, BenchmarkData.class, countingListener) + .sheet() + .doRead(); + consumeData(countingListener.getCount(), blackhole); + } + + @Benchmark + public void readCsvMedium(Blackhole blackhole) throws Exception { + EasyExcel.read(csvMediumFile, BenchmarkData.class, countingListener) + .sheet() + .doRead(); + consumeData(countingListener.getCount(), blackhole); + } + + @Benchmark + public void readCsvLarge(Blackhole blackhole) throws Exception { + EasyExcel.read(csvLargeFile, BenchmarkData.class, countingListener) + .sheet() + .doRead(); + consumeData(countingListener.getCount(), blackhole); + } + + @Benchmark + public void readCsvExtraLarge(Blackhole blackhole) throws Exception { + EasyExcel.read(csvExtraLargeFile, BenchmarkData.class, countingListener) + .sheet() + .doRead(); + consumeData(countingListener.getCount(), blackhole); + } + + // Stream reading benchmarks + @Benchmark + public void readXlsEXTRA_LARGEWithStreaming(Blackhole blackhole) throws Exception { + try (FileInputStream fis = new FileInputStream(xlsEXTRA_LARGEFile)) { + EasyExcel.read(fis, BenchmarkData.class, countingListener).sheet().doRead(); + consumeData(countingListener.getCount(), blackhole); + } + } + + @Benchmark + public void readCsvLargeWithStreaming(Blackhole blackhole) throws Exception { + try (FileInputStream fis = new FileInputStream(csvLargeFile)) { + EasyExcel.read(fis, BenchmarkData.class, countingListener).sheet().doRead(); + consumeData(countingListener.getCount(), blackhole); + } + } + + // Different listener types benchmarks + @Benchmark + public void readXlsEXTRA_LARGECountingOnly(Blackhole blackhole) throws Exception { + EasyExcel.read(xlsEXTRA_LARGEFile, BenchmarkData.class, countingListener) + .sheet() + .doRead(); + consumeData(countingListener.getCount(), blackhole); + } + + @Benchmark + public void readXlsEXTRA_LARGECollecting(Blackhole blackhole) throws Exception { + EasyExcel.read(xlsEXTRA_LARGEFile, BenchmarkData.class, collectingListener) + .sheet() + .doRead(); + consumeData(collectingListener.getData().size(), blackhole); + } + + @Benchmark + public void readXlsEXTRA_LARGEProcessing(Blackhole blackhole) throws Exception { + EasyExcel.read(xlsEXTRA_LARGEFile, BenchmarkData.class, processingListener) + .sheet() + .doRead(); + consumeData(processingListener.getProcessedCount(), blackhole); + } + + // Head configuration benchmarks + @Benchmark + public void readXlsEXTRA_LARGEWithHeadRowNumber(Blackhole blackhole) throws Exception { + EasyExcel.read(xlsEXTRA_LARGEFile, BenchmarkData.class, countingListener) + .headRowNumber(1) + .sheet() + .doRead(); + consumeData(countingListener.getCount(), blackhole); + } + + @Benchmark + public void readXlsEXTRA_LARGESkipRows(Blackhole blackhole) throws Exception { + EasyExcel.read(xlsEXTRA_LARGEFile, BenchmarkData.class, countingListener) + .headRowNumber(2) // Skip first row + .sheet() + .doRead(); + consumeData(countingListener.getCount(), blackhole); + } + + // Multiple sheets reading (using same file) + @Benchmark + public void readXlsxMultipleSheets(Blackhole blackhole) throws Exception { + for (int i = 0; i < 3; i++) { + EasyExcel.read(xlsxMediumFile, BenchmarkData.class, countingListener) + .sheet(0) // Always read first sheet since our test files have only one + .doRead(); + } + consumeData(countingListener.getCount(), blackhole); + } + + // Memory efficient reading with limited collections + @Benchmark + public void readXlsEXTRA_LARGEMemoryEfficient(Blackhole blackhole) throws Exception { + LimitedCollectingReadListener limitedListener = new LimitedCollectingReadListener(1000); + EasyExcel.read(xlsEXTRA_LARGEFile, BenchmarkData.class, limitedListener) + .sheet() + .doRead(); + consumeData(limitedListener.getData().size(), blackhole); + } + + // Error handling benchmark + @Benchmark + public void readXlsxWithErrorHandling(Blackhole blackhole) throws Exception { + ErrorHandlingReadListener errorListener = new ErrorHandlingReadListener(); + try { + EasyExcel.read(xlsEXTRA_LARGEFile, BenchmarkData.class, errorListener) + .sheet() + .doRead(); + } catch (Exception e) { + // Expected for some error scenarios + } + consumeData(errorListener.getProcessedCount(), blackhole); + } + + // Read Listeners + private static class CountingReadListener implements ReadListener { + private final AtomicLong count = new AtomicLong(0); + + @Override + public void invoke(BenchmarkData data, AnalysisContext context) { + count.incrementAndGet(); + } + + @Override + public void doAfterAllAnalysed(AnalysisContext context) { + // Nothing to do + } + + public long getCount() { + return count.get(); + } + + public void reset() { + count.set(0); + } + } + + private static class CollectingReadListener implements ReadListener { + private final List data = new ArrayList<>(); + + @Override + public void invoke(BenchmarkData item, AnalysisContext context) { + data.add(item); + } + + @Override + public void doAfterAllAnalysed(AnalysisContext context) { + // Nothing to do + } + + public List getData() { + return data; + } + + public void reset() { + data.clear(); + } + } + + private static class ProcessingReadListener implements ReadListener { + private final AtomicLong processedCount = new AtomicLong(0); + + @Override + public void invoke(BenchmarkData data, AnalysisContext context) { + // Simulate some processing + if (data.getStringData() != null && data.getStringData().length() > 0) { + String processed = data.getStringData().toUpperCase(); + // Simulate validation + if (data.getIntValue() != null && data.getIntValue() > 0) { + processedCount.incrementAndGet(); + } + } + } + + @Override + public void doAfterAllAnalysed(AnalysisContext context) { + // Nothing to do + } + + public long getProcessedCount() { + return processedCount.get(); + } + + public void reset() { + processedCount.set(0); + } + } + + private static class LimitedCollectingReadListener implements ReadListener { + private final List data = new ArrayList<>(); + private final int maxSize; + + public LimitedCollectingReadListener(int maxSize) { + this.maxSize = maxSize; + } + + @Override + public void invoke(BenchmarkData item, AnalysisContext context) { + if (data.size() < maxSize) { + data.add(item); + } + } + + @Override + public void doAfterAllAnalysed(AnalysisContext context) { + // Nothing to do + } + + public List getData() { + return data; + } + + public void reset() { + data.clear(); + } + } + + private static class ErrorHandlingReadListener implements ReadListener { + private final AtomicLong processedCount = new AtomicLong(0); + private final AtomicLong errorCount = new AtomicLong(0); + + @Override + public void invoke(BenchmarkData data, AnalysisContext context) { + try { + // Simulate processing that might fail + if (data.getStringData() != null) { + processedCount.incrementAndGet(); + } + } catch (Exception e) { + errorCount.incrementAndGet(); + } + } + + @Override + public void doAfterAllAnalysed(AnalysisContext context) { + // Nothing to do + } + + public long getProcessedCount() { + return processedCount.get(); + } + + public long getErrorCount() { + return errorCount.get(); + } + + public void reset() { + processedCount.set(0); + errorCount.set(0); + } + } +} diff --git a/fesod-benchmark/src/main/java/org/apache/fesod/sheet/benchmark/operations/WriteBenchmark.java b/fesod-benchmark/src/main/java/org/apache/fesod/sheet/benchmark/operations/WriteBenchmark.java new file mode 100644 index 000000000..cc8481d77 --- /dev/null +++ b/fesod-benchmark/src/main/java/org/apache/fesod/sheet/benchmark/operations/WriteBenchmark.java @@ -0,0 +1,444 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.fesod.sheet.benchmark.operations; + +import java.io.FileOutputStream; +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.TimeUnit; +import org.apache.fesod.sheet.EasyExcel; +import org.apache.fesod.sheet.ExcelWriter; +import org.apache.fesod.sheet.benchmark.core.AbstractBenchmark; +import org.apache.fesod.sheet.benchmark.core.BenchmarkConfiguration; +import org.apache.fesod.sheet.benchmark.data.BenchmarkData; +import org.apache.fesod.sheet.benchmark.utils.BenchmarkFileUtil; +import org.apache.fesod.sheet.benchmark.utils.DataGenerator; +import org.apache.fesod.sheet.write.metadata.WriteSheet; +import org.apache.fesod.sheet.write.metadata.WriteTable; +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.BenchmarkMode; +import org.openjdk.jmh.annotations.Fork; +import org.openjdk.jmh.annotations.Measurement; +import org.openjdk.jmh.annotations.Mode; +import org.openjdk.jmh.annotations.OutputTimeUnit; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.annotations.Warmup; +import org.openjdk.jmh.infra.Blackhole; + +/** + * Comprehensive benchmarks for FastExcel write operations + */ +@BenchmarkMode(Mode.Throughput) +@OutputTimeUnit(TimeUnit.SECONDS) +@State(Scope.Benchmark) +@Warmup(iterations = 3, time = 2) +@Measurement(iterations = 5, time = 3) +@Fork(1) +public class WriteBenchmark extends AbstractBenchmark { + + // Test data for different sizes + private List smallData; + private List mediumData; + private List largeData; + private List extraLargeData; + + // Batch data for streaming tests + private List> smallBatches; + private List> mediumBatches; + private List> largeBatches; + + // Data generator + private DataGenerator dataGenerator; + + @Override + protected void setupBenchmark() throws Exception { + logger.info("Setting up write benchmark test data..."); + + dataGenerator = new DataGenerator(); + + // Generate test data sets + generateTestData(); + + logger.info("Write benchmark setup completed"); + } + + @Override + protected void tearDownBenchmark() throws Exception { + // Clean up temporary files + BenchmarkFileUtil.cleanupTempFiles(); + logger.info("Write benchmark cleanup completed"); + } + + private void generateTestData() { + // Generate data for different sizes + smallData = dataGenerator.generateData(BenchmarkConfiguration.DatasetSize.SMALL); + mediumData = dataGenerator.generateData(BenchmarkConfiguration.DatasetSize.MEDIUM); + largeData = dataGenerator.generateData(BenchmarkConfiguration.DatasetSize.LARGE); + extraLargeData = dataGenerator.generateData(BenchmarkConfiguration.DatasetSize.EXTRA_LARGE); + + // Generate batch data for streaming + smallBatches = dataGenerator.generateDataInBatches(BenchmarkConfiguration.DatasetSize.SMALL.getRowCount(), 100); + mediumBatches = + dataGenerator.generateDataInBatches(BenchmarkConfiguration.DatasetSize.MEDIUM.getRowCount(), 1000); + largeBatches = + dataGenerator.generateDataInBatches(BenchmarkConfiguration.DatasetSize.LARGE.getRowCount(), 5000); + + logger.debug( + "Generated test data - Small: {}, Medium: {}, Large: {}, Extra Large: {} rows", + smallData.size(), + mediumData.size(), + largeData.size(), + extraLargeData.size()); + } + + // XLSX Write Benchmarks - Different sizes + @Benchmark + public void writeXlsxSmall(Blackhole blackhole) throws Exception { + String filePath = BenchmarkFileUtil.getTempFilePath( + BenchmarkConfiguration.FileFormat.XLSX, BenchmarkConfiguration.DatasetSize.SMALL, "WriteBenchmark"); + + EasyExcel.write(filePath, BenchmarkData.class).sheet("BenchmarkData").doWrite(smallData); + + long fileSize = BenchmarkFileUtil.getFileSize(filePath); + consumeData(fileSize, blackhole); + } + + @Benchmark + public void writeXlsxMedium(Blackhole blackhole) throws Exception { + String filePath = BenchmarkFileUtil.getTempFilePath( + BenchmarkConfiguration.FileFormat.XLSX, BenchmarkConfiguration.DatasetSize.MEDIUM, "WriteBenchmark"); + + EasyExcel.write(filePath, BenchmarkData.class).sheet("BenchmarkData").doWrite(mediumData); + + long fileSize = BenchmarkFileUtil.getFileSize(filePath); + consumeData(fileSize, blackhole); + } + + @Benchmark + public void writeXlsEXTRA_LARGE(Blackhole blackhole) throws Exception { + String filePath = BenchmarkFileUtil.getTempFilePath( + BenchmarkConfiguration.FileFormat.XLSX, BenchmarkConfiguration.DatasetSize.LARGE, "WriteBenchmark"); + + EasyExcel.write(filePath, BenchmarkData.class).sheet("BenchmarkData").doWrite(largeData); + + long fileSize = BenchmarkFileUtil.getFileSize(filePath); + consumeData(fileSize, blackhole); + } + + @Benchmark + public void writeXlsxExtraLarge(Blackhole blackhole) throws Exception { + String filePath = BenchmarkFileUtil.getTempFilePath( + BenchmarkConfiguration.FileFormat.XLSX, + BenchmarkConfiguration.DatasetSize.EXTRA_LARGE, + "WriteBenchmark"); + + EasyExcel.write(filePath, BenchmarkData.class).sheet("BenchmarkData").doWrite(extraLargeData); + + long fileSize = BenchmarkFileUtil.getFileSize(filePath); + consumeData(fileSize, blackhole); + } + + @Benchmark + public void writeXlsSmall(Blackhole blackhole) throws Exception { + String filePath = BenchmarkFileUtil.getTempFilePath( + BenchmarkConfiguration.FileFormat.XLS, BenchmarkConfiguration.DatasetSize.SMALL, "WriteBenchmark"); + + EasyExcel.write(filePath, BenchmarkData.class).sheet("BenchmarkData").doWrite(smallData); + + long fileSize = BenchmarkFileUtil.getFileSize(filePath); + consumeData(fileSize, blackhole); + } + + @Benchmark + public void writeCsvMedium(Blackhole blackhole) throws Exception { + String filePath = BenchmarkFileUtil.getTempFilePath( + BenchmarkConfiguration.FileFormat.CSV, BenchmarkConfiguration.DatasetSize.MEDIUM, "WriteBenchmark"); + + EasyExcel.write(filePath, BenchmarkData.class).sheet("BenchmarkData").doWrite(mediumData); + + long fileSize = BenchmarkFileUtil.getFileSize(filePath); + consumeData(fileSize, blackhole); + } + + @Benchmark + public void writeCsvLarge(Blackhole blackhole) throws Exception { + String filePath = BenchmarkFileUtil.getTempFilePath( + BenchmarkConfiguration.FileFormat.CSV, BenchmarkConfiguration.DatasetSize.LARGE, "WriteBenchmark"); + + EasyExcel.write(filePath, BenchmarkData.class).sheet("BenchmarkData").doWrite(largeData); + + long fileSize = BenchmarkFileUtil.getFileSize(filePath); + consumeData(fileSize, blackhole); + } + + @Benchmark + public void writeCsvExtraLarge(Blackhole blackhole) throws Exception { + String filePath = BenchmarkFileUtil.getTempFilePath( + BenchmarkConfiguration.FileFormat.CSV, + BenchmarkConfiguration.DatasetSize.EXTRA_LARGE, + "WriteBenchmark"); + + EasyExcel.write(filePath, BenchmarkData.class).sheet("BenchmarkData").doWrite(extraLargeData); + + long fileSize = BenchmarkFileUtil.getFileSize(filePath); + consumeData(fileSize, blackhole); + } + + // Streaming write benchmarks using ExcelWriter + @Benchmark + public void writeXlsEXTRA_LARGEStreaming(Blackhole blackhole) throws Exception { + String filePath = BenchmarkFileUtil.getTempFilePath( + BenchmarkConfiguration.FileFormat.XLSX, + BenchmarkConfiguration.DatasetSize.LARGE, + "StreamingWriteBenchmark"); + + try (ExcelWriter excelWriter = + EasyExcel.write(filePath, BenchmarkData.class).build()) { + WriteSheet writeSheet = EasyExcel.writerSheet("StreamingData").build(); + + for (List batch : largeBatches) { + excelWriter.write(batch, writeSheet); + } + } + + long fileSize = BenchmarkFileUtil.getFileSize(filePath); + consumeData(fileSize, blackhole); + } + + @Benchmark + public void writeCsvLargeStreaming(Blackhole blackhole) throws Exception { + String filePath = BenchmarkFileUtil.getTempFilePath( + BenchmarkConfiguration.FileFormat.CSV, + BenchmarkConfiguration.DatasetSize.LARGE, + "StreamingWriteBenchmark"); + + try (ExcelWriter excelWriter = + EasyExcel.write(filePath, BenchmarkData.class).build()) { + WriteSheet writeSheet = EasyExcel.writerSheet("StreamingData").build(); + + for (List batch : largeBatches) { + excelWriter.write(batch, writeSheet); + } + } + + long fileSize = BenchmarkFileUtil.getFileSize(filePath); + consumeData(fileSize, blackhole); + } + + // Multiple sheets writing + @Benchmark + public void writeXlsxMultipleSheets(Blackhole blackhole) throws Exception { + String filePath = BenchmarkFileUtil.getTempFilePath( + BenchmarkConfiguration.FileFormat.XLSX, + BenchmarkConfiguration.DatasetSize.MEDIUM, + "MultiSheetWriteBenchmark"); + + try (ExcelWriter excelWriter = + EasyExcel.write(filePath, BenchmarkData.class).build()) { + // Write to 3 different sheets + for (int i = 0; i < 3; i++) { + WriteSheet writeSheet = + EasyExcel.writerSheet(i, "Sheet" + (i + 1)).build(); + excelWriter.write(mediumData, writeSheet); + } + } + + long fileSize = BenchmarkFileUtil.getFileSize(filePath); + consumeData(fileSize, blackhole); + } + + @Benchmark + public void writeXlsxToOutputStream(Blackhole blackhole) throws Exception { + String filePath = BenchmarkFileUtil.getTempFilePath( + BenchmarkConfiguration.FileFormat.XLSX, + BenchmarkConfiguration.DatasetSize.MEDIUM, + "OutputStreamWriteBenchmark"); + + try (FileOutputStream fos = new FileOutputStream(filePath)) { + EasyExcel.write(fos, BenchmarkData.class).sheet("OutputStreamData").doWrite(mediumData); + } + + long fileSize = BenchmarkFileUtil.getFileSize(filePath); + consumeData(fileSize, blackhole); + } + + @Benchmark + public void writeXlsxTableFormat(Blackhole blackhole) throws Exception { + String filePath = BenchmarkFileUtil.getTempFilePath( + BenchmarkConfiguration.FileFormat.XLSX, + BenchmarkConfiguration.DatasetSize.LARGE, + "TableFormatWriteBenchmark"); + + try (ExcelWriter excelWriter = + EasyExcel.write(filePath, BenchmarkData.class).build()) { + WriteSheet writeSheet = EasyExcel.writerSheet("TableData").build(); + WriteTable writeTable = EasyExcel.writerTable(0).build(); + + // Write data in table format + for (List batch : largeBatches) { + excelWriter.write(batch, writeSheet, writeTable); + } + } + + long fileSize = BenchmarkFileUtil.getFileSize(filePath); + consumeData(fileSize, blackhole); + } + + @Benchmark + public void writeXlsxMemoryEfficientBatches(Blackhole blackhole) throws Exception { + String filePath = BenchmarkFileUtil.getTempFilePath( + BenchmarkConfiguration.FileFormat.XLSX, + BenchmarkConfiguration.DatasetSize.LARGE, + "MemoryEfficientWriteBenchmark"); + + try (ExcelWriter excelWriter = + EasyExcel.write(filePath, BenchmarkData.class).build()) { + WriteSheet writeSheet = EasyExcel.writerSheet("BatchData").build(); + + // Write in small batches to reduce memory usage + int batchSize = 1000; + for (int i = 0; i < largeData.size(); i += batchSize) { + int endIndex = Math.min(i + batchSize, largeData.size()); + List batch = largeData.subList(i, endIndex); + excelWriter.write(batch, writeSheet); + } + } + + long fileSize = BenchmarkFileUtil.getFileSize(filePath); + consumeData(fileSize, blackhole); + } + + // Dynamic data generation and writing + @Benchmark + public void writeXlsxDynamicGeneration(Blackhole blackhole) throws Exception { + String filePath = BenchmarkFileUtil.getTempFilePath( + BenchmarkConfiguration.FileFormat.XLSX, + BenchmarkConfiguration.DatasetSize.MEDIUM, + "DynamicWriteBenchmark"); + + try (ExcelWriter excelWriter = + EasyExcel.write(filePath, BenchmarkData.class).build()) { + WriteSheet writeSheet = EasyExcel.writerSheet("DynamicData").build(); + + // Generate and write data on-the-fly + DataGenerator.DataStream dataStream = + dataGenerator.generateStreamingData(BenchmarkConfiguration.DatasetSize.MEDIUM.getRowCount()); + + List batch = new ArrayList<>(); + int batchSize = 1000; + + for (BenchmarkData data : dataStream) { + batch.add(data); + + if (batch.size() >= batchSize) { + excelWriter.write(batch, writeSheet); + batch.clear(); + } + } + + // Write remaining data + if (!batch.isEmpty()) { + excelWriter.write(batch, writeSheet); + } + } + + long fileSize = BenchmarkFileUtil.getFileSize(filePath); + consumeData(fileSize, blackhole); + } + + // Write with different data characteristics + @Benchmark + public void writeXlsEXTRA_LARGEStrings(Blackhole blackhole) throws Exception { + String filePath = BenchmarkFileUtil.getTempFilePath( + BenchmarkConfiguration.FileFormat.XLSX, + BenchmarkConfiguration.DatasetSize.MEDIUM, + "LargeStringsWriteBenchmark"); + + List largeStringData = dataGenerator.generateDataWithCharacteristics( + BenchmarkConfiguration.DatasetSize.MEDIUM.getRowCount(), + DataGenerator.DataCharacteristics.defaults().withLargeStrings()); + + EasyExcel.write(filePath, BenchmarkData.class).sheet("LargeStringData").doWrite(largeStringData); + + long fileSize = BenchmarkFileUtil.getFileSize(filePath); + consumeData(fileSize, blackhole); + } + + @Benchmark + public void writeXlsxRepeatedValues(Blackhole blackhole) throws Exception { + String filePath = BenchmarkFileUtil.getTempFilePath( + BenchmarkConfiguration.FileFormat.XLSX, + BenchmarkConfiguration.DatasetSize.MEDIUM, + "RepeatedValuesWriteBenchmark"); + + List repeatedData = dataGenerator.generateDataWithCharacteristics( + BenchmarkConfiguration.DatasetSize.MEDIUM.getRowCount(), + DataGenerator.DataCharacteristics.defaults().withRepeatedValues()); + + EasyExcel.write(filePath, BenchmarkData.class).sheet("RepeatedData").doWrite(repeatedData); + + long fileSize = BenchmarkFileUtil.getFileSize(filePath); + consumeData(fileSize, blackhole); + } + + @Benchmark + public void writeXlsxNullValues(Blackhole blackhole) throws Exception { + String filePath = BenchmarkFileUtil.getTempFilePath( + BenchmarkConfiguration.FileFormat.XLSX, + BenchmarkConfiguration.DatasetSize.MEDIUM, + "NullValuesWriteBenchmark"); + + List nullData = dataGenerator.generateDataWithCharacteristics( + BenchmarkConfiguration.DatasetSize.MEDIUM.getRowCount(), + DataGenerator.DataCharacteristics.defaults().withNullValues()); + + EasyExcel.write(filePath, BenchmarkData.class).sheet("NullData").doWrite(nullData); + + long fileSize = BenchmarkFileUtil.getFileSize(filePath); + consumeData(fileSize, blackhole); + } + + // Concurrent writing simulation + @Benchmark + public void writeXlsxConcurrentSimulation(Blackhole blackhole) throws Exception { + // Simulate concurrent writing by writing multiple files + String[] filePaths = new String[3]; + List> dataSets = BenchmarkFileUtil.listOf(smallData, mediumData, smallData); + + for (int i = 0; i < 3; i++) { + filePaths[i] = BenchmarkFileUtil.getTempFilePath( + BenchmarkConfiguration.FileFormat.XLSX, + BenchmarkConfiguration.DatasetSize.SMALL, + "ConcurrentWriteBenchmark_" + i); + + EasyExcel.write(filePaths[i], BenchmarkData.class) + .sheet("ConcurrentData" + i) + .doWrite(dataSets.get(i)); + } + + long totalSize = 0; + for (String filePath : filePaths) { + totalSize += BenchmarkFileUtil.getFileSize(filePath); + } + + consumeData(totalSize, blackhole); + } +} diff --git a/fesod-benchmark/src/main/java/org/apache/fesod/sheet/benchmark/utils/BenchmarkFileUtil.java b/fesod-benchmark/src/main/java/org/apache/fesod/sheet/benchmark/utils/BenchmarkFileUtil.java new file mode 100644 index 000000000..1a51dcfac --- /dev/null +++ b/fesod-benchmark/src/main/java/org/apache/fesod/sheet/benchmark/utils/BenchmarkFileUtil.java @@ -0,0 +1,158 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.fesod.sheet.benchmark.utils; + +import java.io.File; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import org.apache.fesod.sheet.benchmark.core.BenchmarkConfiguration; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Utility class for managing benchmark test files + */ +public class BenchmarkFileUtil { + + private static final Logger logger = LoggerFactory.getLogger(BenchmarkFileUtil.class); + + private static final String TEST_DATA_DIR = "target/benchmark-testdata"; + + /** + * Create test data directory if it doesn't exist + */ + public static void createTestDataDirectory() { + try { + Path testDataPath = Paths.get(TEST_DATA_DIR); + if (!Files.exists(testDataPath)) { + Files.createDirectories(testDataPath); + logger.debug("Created test data directory: {}", testDataPath); + } + } catch (IOException e) { + logger.error("Failed to create test data directory", e); + throw new RuntimeException("Failed to create test data directory", e); + } + } + + /** + * Generate a temporary file path for benchmarks + */ + public static String getTempFilePath( + BenchmarkConfiguration.FileFormat format, BenchmarkConfiguration.DatasetSize size, String benchmarkName) { + createTestDataDirectory(); + + String fileName = String.format( + "temp_%s_%s_%s_%d.%s", + benchmarkName, + size.getLabel(), + format.name().toLowerCase(), + System.currentTimeMillis(), + format.getExtension()); + return TEST_DATA_DIR + File.separator + fileName; + } + + /** + * Clean up temporary files created during benchmarks + */ + public static void cleanupTempFiles() { + try { + Path testDataPath = Paths.get(TEST_DATA_DIR); + if (Files.exists(testDataPath)) { + Files.walk(testDataPath) + .filter(path -> path.getFileName().toString().startsWith("temp_")) + .forEach(path -> { + try { + Files.deleteIfExists(path); + logger.debug("Deleted temp file: {}", path); + } catch (IOException e) { + logger.warn("Failed to delete temp file: {}", path, e); + } + }); + } + } catch (IOException e) { + logger.warn("Failed to cleanup temp files", e); + } + } + + /** + * Get file size in bytes + */ + public static long getFileSize(String filePath) { + try { + return Files.size(Paths.get(filePath)); + } catch (IOException e) { + logger.warn("Failed to get file size for: {}", filePath, e); + return 0; + } + } + + /** + * Get file size in human readable format + */ + public static String getFileSizeFormatted(String filePath) { + long bytes = getFileSize(filePath); + return formatBytes(bytes); + } + + /** + * Format bytes into human readable format + */ + public static String formatBytes(long bytes) { + if (bytes < 1024) return bytes + " B"; + int exp = (int) (Math.log(bytes) / Math.log(1024)); + String pre = "KMGTPE".charAt(exp - 1) + ""; + return String.format("%.1f %sB", bytes / Math.pow(1024, exp), pre); + } + + /** + * Create a test file with the specified name + */ + public static File createTestFile(String fileName) { + createTestDataDirectory(); + return new File(TEST_DATA_DIR, fileName); + } + + /** + * Read a string from a file + * @param path the path to the file + * @return the content of the file as a string + * @throws IOException if an I/O error occurs + */ + public static String readString(Path path) throws IOException { + return new String(Files.readAllBytes(path)); + } + + /** + * Create a list with the specified elements + * @param elements the elements to include in the list + * @return a list containing the specified elements + */ + @SafeVarargs + public static List listOf(T... elements) { + List list = new ArrayList<>(elements.length); + list.addAll(Arrays.asList(elements)); + return list; + } +} diff --git a/fesod-benchmark/src/main/java/org/apache/fesod/sheet/benchmark/utils/DataGenerator.java b/fesod-benchmark/src/main/java/org/apache/fesod/sheet/benchmark/utils/DataGenerator.java new file mode 100644 index 000000000..d4a7da122 --- /dev/null +++ b/fesod-benchmark/src/main/java/org/apache/fesod/sheet/benchmark/utils/DataGenerator.java @@ -0,0 +1,441 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.fesod.sheet.benchmark.utils; + +import java.math.BigDecimal; +import java.math.RoundingMode; +import java.time.LocalDate; +import java.time.LocalDateTime; +import java.util.ArrayList; +import java.util.List; +import java.util.Random; +import java.util.concurrent.ThreadLocalRandom; +import org.apache.fesod.sheet.benchmark.core.BenchmarkConfiguration; +import org.apache.fesod.sheet.benchmark.data.BenchmarkData; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Utility class for generating test data for benchmarks + */ +public class DataGenerator { + + private static final Logger logger = LoggerFactory.getLogger(DataGenerator.class); + + // Predefined data sets for realistic data generation + private static final String[] CATEGORIES = { + "Electronics", + "Books", + "Clothing", + "Home & Garden", + "Sports", + "Automotive", + "Health & Beauty", + "Toys & Games", + "Food & Beverage", + "Office Supplies" + }; + + private static final String[] STATUSES = { + "Active", "Inactive", "Pending", "Processing", "Completed", "Cancelled", "On Hold" + }; + + private static final String[] SAMPLE_WORDS = { + "Lorem", + "ipsum", + "dolor", + "sit", + "amet", + "consectetur", + "adipiscing", + "elit", + "sed", + "do", + "eiusmod", + "tempor", + "incididunt", + "ut", + "labore", + "et", + "dolore", + "magna", + "aliqua", + "enim", + "ad", + "minim", + "veniam", + "quis", + "nostrud", + "exercitation", + "ullamco", + "laboris", + "nisi", + "aliquip", + "ex", + "ea", + "commodo" + }; + + private final Random random; + + public DataGenerator() { + this.random = ThreadLocalRandom.current(); + } + + public DataGenerator(long seed) { + this.random = new Random(seed); + } + + /** + * Generate a list of benchmark data with the specified size + */ + public List generateData(BenchmarkConfiguration.DatasetSize size) { + return generateData(size.getRowCount()); + } + + /** + * Generate a list of benchmark data with the specified row count + */ + public List generateData(int rowCount) { + logger.info("Generating {} rows of benchmark data", rowCount); + + List data = new ArrayList<>(rowCount); + long startTime = System.currentTimeMillis(); + + for (int i = 0; i < rowCount; i++) { + data.add(generateSingleRow(i + 1)); + + // Log progress for large datasets + if (rowCount > 10000 && i > 0 && i % 10000 == 0) { + logger.debug("Generated {} rows", i); + } + } + + long duration = System.currentTimeMillis() - startTime; + logger.info( + "Generated {} rows in {} ms ({} rows/sec)", + rowCount, + duration, + duration > 0 ? (rowCount * 1000 / duration) : "N/A"); + + return data; + } + + /** + * Generate benchmark data in batches to control memory usage + */ + public List> generateDataInBatches(int totalRows, int batchSize) { + logger.info("Generating {} rows in batches of {}", totalRows, batchSize); + + List> batches = new ArrayList<>(); + int remainingRows = totalRows; + int currentBatch = 1; + int startId = 1; + + while (remainingRows > 0) { + int currentBatchSize = Math.min(batchSize, remainingRows); + List batch = new ArrayList<>(currentBatchSize); + + for (int i = 0; i < currentBatchSize; i++) { + batch.add(generateSingleRow(startId + i)); + } + + batches.add(batch); + remainingRows -= currentBatchSize; + startId += currentBatchSize; + + logger.debug("Generated batch {} with {} rows", currentBatch++, currentBatchSize); + } + + logger.info("Generated {} batches totaling {} rows", batches.size(), totalRows); + return batches; + } + + /** + * Generate a single row of benchmark data + */ + private BenchmarkData generateSingleRow(long id) { + BenchmarkData data = new BenchmarkData(); + + data.setId(id); + data.setStringData(generateRandomString(10, 50)); + data.setIntValue(random.nextInt(1000000)); + data.setLongValue(random.nextLong()); + data.setDoubleValue(random.nextDouble() * 1000000); + data.setBigDecimalValue( + BigDecimal.valueOf(random.nextDouble() * 1000000).setScale(2, RoundingMode.HALF_UP)); + data.setBooleanFlag(random.nextBoolean()); + data.setDateValue(generateRandomDate()); + data.setDateTimeValue(generateRandomDateTime()); + data.setCategory(CATEGORIES[random.nextInt(CATEGORIES.length)]); + data.setDescription(generateRandomDescription()); + data.setStatus(STATUSES[random.nextInt(STATUSES.length)]); + data.setFloatValue(random.nextFloat() * 1000); + data.setShortValue((short) random.nextInt(Short.MAX_VALUE)); + data.setByteValue((byte) random.nextInt(Byte.MAX_VALUE)); + data.setExtraData1(generateRandomString(5, 20)); + data.setExtraData2(generateRandomString(5, 20)); + data.setExtraData3(generateRandomString(5, 20)); + data.setExtraData4(generateRandomString(5, 20)); + data.setExtraData5(generateRandomString(5, 20)); + + return data; + } + + /** + * Generate random string with variable length + */ + private String generateRandomString(int minLength, int maxLength) { + int length = random.nextInt(maxLength - minLength + 1) + minLength; + StringBuilder sb = new StringBuilder(length); + + for (int i = 0; i < length; i++) { + if (random.nextBoolean()) { + // Add random letter + sb.append((char) ('a' + random.nextInt(26))); + } else { + // Add random digit + sb.append((char) ('0' + random.nextInt(10))); + } + } + + return sb.toString(); + } + + /** + * Generate random description using sample words + */ + private String generateRandomDescription() { + int wordCount = random.nextInt(8) + 3; // 3-10 words + StringBuilder sb = new StringBuilder(); + + for (int i = 0; i < wordCount; i++) { + if (i > 0) { + sb.append(" "); + } + sb.append(SAMPLE_WORDS[random.nextInt(SAMPLE_WORDS.length)]); + } + + return sb.toString(); + } + + /** + * Generate random date within the last 5 years + */ + private LocalDate generateRandomDate() { + LocalDate now = LocalDate.now(); + LocalDate fiveYearsAgo = now.minusYears(5); + long daysBetween = java.time.temporal.ChronoUnit.DAYS.between(fiveYearsAgo, now); + long randomDays = random.nextLong() % daysBetween; + return fiveYearsAgo.plusDays(Math.abs(randomDays)); + } + + /** + * Generate random datetime within the last year + */ + private LocalDateTime generateRandomDateTime() { + LocalDateTime now = LocalDateTime.now(); + LocalDateTime oneYearAgo = now.minusYears(1); + long secondsBetween = java.time.temporal.ChronoUnit.SECONDS.between(oneYearAgo, now); + long randomSeconds = random.nextLong() % secondsBetween; + return oneYearAgo.plusSeconds(Math.abs(randomSeconds)); + } + + /** + * Generate memory-efficient streaming data + */ + public DataStream generateStreamingData(int totalRows) { + return new DataStream(totalRows, this); + } + + /** + * Iterator-based data stream for memory-efficient data generation + */ + public static class DataStream implements Iterable { + private final int totalRows; + private final DataGenerator generator; + + public DataStream(int totalRows, DataGenerator generator) { + this.totalRows = totalRows; + this.generator = generator; + } + + @Override + public java.util.Iterator iterator() { + return new java.util.Iterator() { + private int currentRow = 0; + + @Override + public boolean hasNext() { + return currentRow < totalRows; + } + + @Override + public BenchmarkData next() { + if (!hasNext()) { + throw new java.util.NoSuchElementException(); + } + return generator.generateSingleRow(++currentRow); + } + }; + } + + public int getTotalRows() { + return totalRows; + } + } + + /** + * Generate data with specific characteristics for performance testing + */ + public List generateDataWithCharacteristics(int rowCount, DataCharacteristics characteristics) { + logger.info("Generating {} rows with specific characteristics: {}", rowCount, characteristics); + + List data = new ArrayList<>(rowCount); + + for (int i = 0; i < rowCount; i++) { + BenchmarkData row = generateSingleRow(i + 1); + + // Apply characteristics + if (characteristics.isLargeStrings()) { + row.setStringData(generateRandomString(100, 500)); + row.setDescription(generateLargeDescription()); + } + + if (characteristics.isRepeatedValues()) { + // Use limited set of values to create repetition + row.setCategory(CATEGORIES[i % 3]); + row.setStatus(STATUSES[i % 2]); + } + + if (characteristics.isNullValues()) { + // Randomly nullify some fields + if (random.nextFloat() < 0.1) { // 10% chance + row.setExtraData1(null); + row.setExtraData2(null); + } + } + + data.add(row); + } + + return data; + } + + private String generateLargeDescription() { + int sentenceCount = random.nextInt(10) + 5; // 5-14 sentences + StringBuilder sb = new StringBuilder(); + + for (int i = 0; i < sentenceCount; i++) { + if (i > 0) { + sb.append(". "); + } + + int wordsInSentence = random.nextInt(15) + 5; // 5-19 words per sentence + for (int j = 0; j < wordsInSentence; j++) { + if (j > 0) { + sb.append(" "); + } + sb.append(SAMPLE_WORDS[random.nextInt(SAMPLE_WORDS.length)]); + } + } + + return sb.toString(); + } + + /** + * Configuration class for data characteristics + */ + public static class DataCharacteristics { + private boolean largeStrings = false; + private boolean repeatedValues = false; + private boolean nullValues = false; + + public static DataCharacteristics defaults() { + return new DataCharacteristics(); + } + + public DataCharacteristics withLargeStrings() { + this.largeStrings = true; + return this; + } + + public DataCharacteristics withRepeatedValues() { + this.repeatedValues = true; + return this; + } + + public DataCharacteristics withNullValues() { + this.nullValues = true; + return this; + } + + public boolean isLargeStrings() { + return largeStrings; + } + + public boolean isRepeatedValues() { + return repeatedValues; + } + + public boolean isNullValues() { + return nullValues; + } + + @Override + public String toString() { + return "DataCharacteristics{" + "largeStrings=" + + largeStrings + ", repeatedValues=" + + repeatedValues + ", nullValues=" + + nullValues + '}'; + } + } + + // Static convenience methods for backward compatibility + private static final DataGenerator defaultGenerator = new DataGenerator(); + + /** + * Generate test data list using default generator + */ + public static List generateTestDataList(int rowCount) { + return defaultGenerator.generateData(rowCount); + } + + /** + * Generate test data with specific size using default generator + */ + public static List generateTestDataList(BenchmarkConfiguration.DatasetSize size) { + return defaultGenerator.generateData(size); + } + + /** + * Generate test data with specific size (alias method) + */ + public static List generateTestData(BenchmarkConfiguration.DatasetSize size) { + return defaultGenerator.generateData(size); + } + + /** + * Alias for BenchmarkData to maintain compatibility + */ + public static class TestData extends BenchmarkData { + // This class exists for backward compatibility + } +} diff --git a/fesod-benchmark/src/main/java/org/apache/fesod/sheet/benchmark/utils/MemoryProfiler.java b/fesod-benchmark/src/main/java/org/apache/fesod/sheet/benchmark/utils/MemoryProfiler.java new file mode 100644 index 000000000..5beff3009 --- /dev/null +++ b/fesod-benchmark/src/main/java/org/apache/fesod/sheet/benchmark/utils/MemoryProfiler.java @@ -0,0 +1,402 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.fesod.sheet.benchmark.utils; + +import java.lang.management.GarbageCollectorMXBean; +import java.lang.management.ManagementFactory; +import java.lang.management.MemoryMXBean; +import java.lang.management.MemoryUsage; +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.Executors; +import java.util.concurrent.ScheduledExecutorService; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicLong; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Utility class for profiling memory usage during benchmark execution + */ +public class MemoryProfiler { + + private static final Logger logger = LoggerFactory.getLogger(MemoryProfiler.class); + + private final MemoryMXBean memoryBean; + private final List gcBeans; + private volatile ScheduledExecutorService scheduler; + private final AtomicBoolean running; + private final Object schedulerLock = new Object(); + + // Memory tracking variables + private final AtomicLong maxUsedMemory; + private final AtomicLong totalMemorySamples; + private final AtomicLong sumMemoryUsage; + private final List memorySnapshots; + + // GC tracking variables + private long initialGcCount; + private long initialGcTime; + private long startTime; + + public MemoryProfiler() { + this.memoryBean = ManagementFactory.getMemoryMXBean(); + this.gcBeans = ManagementFactory.getGarbageCollectorMXBeans(); + this.scheduler = createScheduler(); + this.running = new AtomicBoolean(false); + this.maxUsedMemory = new AtomicLong(0); + this.totalMemorySamples = new AtomicLong(0); + this.sumMemoryUsage = new AtomicLong(0); + this.memorySnapshots = new ArrayList<>(); + } + + /** + * Start memory profiling + */ + public void start() { + if (running.compareAndSet(false, true)) { + reset(); + startTime = System.currentTimeMillis(); + + // Record initial GC stats + initialGcCount = getTotalGcCount(); + initialGcTime = getTotalGcTime(); + + // Create a new scheduler if needed + synchronized (schedulerLock) { + if (scheduler.isShutdown() || scheduler.isTerminated()) { + scheduler = createScheduler(); + } + } + + try { + // Start memory sampling + scheduler.scheduleAtFixedRate( + this::sampleMemory, + 0, + org.apache.fesod.sheet.benchmark.core.BenchmarkConfiguration.MEMORY_SAMPLING_INTERVAL_MS, + TimeUnit.MILLISECONDS); + + logger.debug("Memory profiling started"); + } catch (Exception e) { + logger.warn("Failed to start memory sampling: {}", e.getMessage()); + running.set(false); + } + } + } + + /** + * Stop memory profiling + */ + public void stop() { + if (running.compareAndSet(true, false)) { + synchronized (schedulerLock) { + if (scheduler != null && !scheduler.isShutdown()) { + scheduler.shutdown(); + try { + if (!scheduler.awaitTermination(1, TimeUnit.SECONDS)) { + scheduler.shutdownNow(); + } + } catch (InterruptedException e) { + scheduler.shutdownNow(); + Thread.currentThread().interrupt(); + } + } + } + logger.debug("Memory profiling stopped"); + } + } + + /** + * Reset all memory tracking variables + */ + public void reset() { + maxUsedMemory.set(0); + totalMemorySamples.set(0); + sumMemoryUsage.set(0); + synchronized (memorySnapshots) { + memorySnapshots.clear(); + } + } + + /** + * Sample current memory usage + */ + private void sampleMemory() { + try { + MemoryUsage heapUsage = memoryBean.getHeapMemoryUsage(); + long currentUsed = heapUsage.getUsed(); + + // Update max memory usage + maxUsedMemory.updateAndGet(current -> Math.max(current, currentUsed)); + + // Update average calculation + totalMemorySamples.incrementAndGet(); + sumMemoryUsage.addAndGet(currentUsed); + + // Store snapshot for detailed analysis + synchronized (memorySnapshots) { + memorySnapshots.add(currentUsed); + } + + } catch (Exception e) { + logger.warn("Error sampling memory usage", e); + } + } + + /** + * Get current memory snapshot + */ + public MemorySnapshot getSnapshot() { + MemoryUsage heapUsage = memoryBean.getHeapMemoryUsage(); + + long maxUsed = maxUsedMemory.get(); + long samples = totalMemorySamples.get(); + long avgUsed = samples > 0 ? sumMemoryUsage.get() / samples : 0; + + long currentGcCount = getTotalGcCount(); + long currentGcTime = getTotalGcTime(); + + return new MemorySnapshot( + maxUsed, + avgUsed, + heapUsage.getCommitted(), + currentGcCount - initialGcCount, + currentGcTime - initialGcTime, + System.currentTimeMillis() - startTime); + } + + /** + * Shutdown the profiler + */ + public void shutdown() { + stop(); + synchronized (schedulerLock) { + if (scheduler != null) { + scheduler.shutdownNow(); + } + } + } + + /** + * Create a new scheduler + */ + private ScheduledExecutorService createScheduler() { + return Executors.newSingleThreadScheduledExecutor(r -> { + Thread t = new Thread(r, "MemoryProfiler"); + t.setDaemon(true); + return t; + }); + } + + /** + * Get total GC time across all collectors + */ + private long getTotalGcTime() { + return gcBeans.stream() + .mapToLong(bean -> bean.getCollectionTime() > 0 ? bean.getCollectionTime() : 0) + .sum(); + } + + /** + * Get total GC count across all collectors + */ + private long getTotalGcCount() { + return gcBeans.stream() + .mapToLong(bean -> bean.getCollectionCount() > 0 ? bean.getCollectionCount() : 0) + .sum(); + } + + /** + * Get current memory usage + */ + public long getUsedMemory() { + return memoryBean.getHeapMemoryUsage().getUsed(); + } + + /** + * Get peak memory usage + */ + public long getPeakMemoryUsage() { + return maxUsedMemory.get(); + } + + /** + * Get detailed memory statistics + */ + public MemoryStatistics getDetailedStatistics() { + List snapshots; + synchronized (memorySnapshots) { + snapshots = new ArrayList<>(memorySnapshots); + } + + if (snapshots.isEmpty()) { + return new MemoryStatistics(0, 0, 0, 0, 0); + } + + long min = snapshots.stream().mapToLong(Long::longValue).min().orElse(0); + long max = snapshots.stream().mapToLong(Long::longValue).max().orElse(0); + double avg = snapshots.stream().mapToLong(Long::longValue).average().orElse(0); + + // Calculate standard deviation + double variance = snapshots.stream() + .mapToDouble(value -> Math.pow(value - avg, 2)) + .average() + .orElse(0); + double stdDev = Math.sqrt(variance); + + // Calculate 95th percentile + snapshots.sort(Long::compareTo); + int p95Index = (int) Math.ceil(0.95 * snapshots.size()) - 1; + long p95 = snapshots.get(Math.max(0, p95Index)); + + return new MemoryStatistics(min, max, (long) avg, (long) stdDev, p95); + } + + /** + * Memory snapshot data class + */ + public static class MemorySnapshot { + private final long maxUsedMemory; + private final long avgUsedMemory; + private final long allocatedMemory; + private final long gcCount; + private final long gcTime; + private final long durationMs; + + public MemorySnapshot( + long maxUsedMemory, + long avgUsedMemory, + long allocatedMemory, + long gcCount, + long gcTime, + long durationMs) { + this.maxUsedMemory = maxUsedMemory; + this.avgUsedMemory = avgUsedMemory; + this.allocatedMemory = allocatedMemory; + this.gcCount = gcCount; + this.gcTime = gcTime; + this.durationMs = durationMs; + } + + public long getMaxUsedMemory() { + return maxUsedMemory; + } + + public long getAvgUsedMemory() { + return avgUsedMemory; + } + + public long getAllocatedMemory() { + return allocatedMemory; + } + + public long getGcCount() { + return gcCount; + } + + public long getGcTime() { + return gcTime; + } + + public long getDurationMs() { + return durationMs; + } + + public double getMaxUsedMemoryMB() { + return maxUsedMemory / (1024.0 * 1024.0); + } + + public double getAvgUsedMemoryMB() { + return avgUsedMemory / (1024.0 * 1024.0); + } + + public double getAllocatedMemoryMB() { + return allocatedMemory / (1024.0 * 1024.0); + } + + @Override + public String toString() { + return String.format( + "MemorySnapshot{maxUsed=%.2f MB, avgUsed=%.2f MB, allocated=%.2f MB, gcCount=%d, gcTime=%d ms, duration=%d ms}", + getMaxUsedMemoryMB(), getAvgUsedMemoryMB(), getAllocatedMemoryMB(), gcCount, gcTime, durationMs); + } + } + + /** + * Detailed memory statistics data class + */ + public static class MemoryStatistics { + private final long minMemory; + private final long maxMemory; + private final long avgMemory; + private final long stdDevMemory; + private final long p95Memory; + + public MemoryStatistics(long minMemory, long maxMemory, long avgMemory, long stdDevMemory, long p95Memory) { + this.minMemory = minMemory; + this.maxMemory = maxMemory; + this.avgMemory = avgMemory; + this.stdDevMemory = stdDevMemory; + this.p95Memory = p95Memory; + } + + public long getMinMemory() { + return minMemory; + } + + public long getMaxMemory() { + return maxMemory; + } + + public long getAvgMemory() { + return avgMemory; + } + + public long getStdDevMemory() { + return stdDevMemory; + } + + public long getP95Memory() { + return p95Memory; + } + + public double getMinMemoryMB() { + return minMemory / (1024.0 * 1024.0); + } + + public double getMaxMemoryMB() { + return maxMemory / (1024.0 * 1024.0); + } + + public double getAvgMemoryMB() { + return avgMemory / (1024.0 * 1024.0); + } + + public double getStdDevMemoryMB() { + return stdDevMemory / (1024.0 * 1024.0); + } + + public double getP95MemoryMB() { + return p95Memory / (1024.0 * 1024.0); + } + } +} diff --git a/pom.xml b/pom.xml index b1b793fe6..86d2f8453 100644 --- a/pom.xml +++ b/pom.xml @@ -52,6 +52,7 @@ fesod-shaded fesod-examples fesod-sheet + fesod-benchmark