From d65bd7dade640681ba57688f3e089a28fb02d444 Mon Sep 17 00:00:00 2001 From: Cameron Lee Date: Tue, 14 Jan 2020 14:22:49 -0800 Subject: [PATCH 1/5] SAMZA-2433: Use log4j2 in samza-hello-samza --- build.gradle | 23 +++-- gradle.properties | 2 +- pom.xml | 136 +++++++++++++++++++++++------ src/main/assembly/src.xml | 2 +- src/main/resources/log4j.xml | 52 ----------- src/main/resources/log4j2.xml | 52 +++++++++++ src/test/resources/log4j2-test.xml | 37 ++++++++ 7 files changed, 213 insertions(+), 91 deletions(-) delete mode 100644 src/main/resources/log4j.xml create mode 100644 src/main/resources/log4j2.xml create mode 100644 src/test/resources/log4j2-test.xml diff --git a/build.gradle b/build.gradle index 9c807c72..427409c4 100644 --- a/build.gradle +++ b/build.gradle @@ -49,25 +49,32 @@ configurations { explode } +configurations.all { + // using log4j1 to log4j2 bridge so need to exclude log4j1 + exclude group: 'log4j', module: 'log4j' + // exclude all other slf4j bindings that are transitively pulled in + exclude group: 'org.slf4j', module: 'slf4j-log4j12' +} + dependencies { compile(group: 'org.codehaus.jackson', name: 'jackson-jaxrs', version: '1.9.13') compile(group: 'org.slf4j', name: 'slf4j-api', version: "$SLF4J_VERSION") - compile(group: 'org.slf4j', name: 'slf4j-log4j12', version: "$SLF4J_VERSION") compile(group: 'org.schwering', name: 'irclib', version: '1.10') compile(group: 'org.apache.samza', name: 'samza-api', version: "$SAMZA_VERSION") - compile(group: 'org.apache.samza', name: 'samza-kv_2.11', version: "$SAMZA_VERSION") - compile(group: 'org.apache.samza', name: 'samza-test_2.11', version: "$SAMZA_VERSION") + compile(group: 'org.apache.samza', name: 'samza-azure_2.11', version: "$SAMZA_VERSION") compile(group: 'org.apache.samza', name: 'samza-kafka_2.11', version: "$SAMZA_VERSION") + compile(group: 'org.apache.samza', name: 'samza-kv_2.11', version: "$SAMZA_VERSION") + compile(group: 'org.apache.samza', name: 'samza-kv-couchbase_2.11', version: "$SAMZA_VERSION") compile(group: 'org.apache.samza', name: 'samza-kv-rocksdb_2.11', version: "$SAMZA_VERSION") - compile(group: 'org.apache.samza', name: 'samza-azure_2.11', version: "$SAMZA_VERSION") - testCompile(group: 'junit', name: 'junit', version: "4.12") - explode (group: 'org.apache.samza', name: 'samza-shell', ext: 'tgz', classifier: 'dist', version: "$SAMZA_VERSION") + explode(group: 'org.apache.samza', name: 'samza-shell', ext: 'tgz', classifier: 'dist', version: "$SAMZA_VERSION") runtime(group: 'org.apache.samza', name: 'samza-core_2.11', version: "$SAMZA_VERSION") - runtime(group: 'org.apache.samza', name: 'samza-log4j_2.11', version: "$SAMZA_VERSION") + runtime(group: 'org.apache.samza', name: 'samza-log4j2_2.11', version: "$SAMZA_VERSION") runtime(group: 'org.apache.samza', name: 'samza-shell', version: "$SAMZA_VERSION") runtime(group: 'org.apache.samza', name: 'samza-yarn_2.11', version: "$SAMZA_VERSION") runtime(group: 'org.apache.kafka', name: 'kafka_2.11', version: "$KAFKA_VERSION") runtime(group: 'org.apache.hadoop', name: 'hadoop-hdfs', version: "$HADOOP_VERSION") + testCompile(group: 'org.apache.samza', name: 'samza-test_2.11', version: "$SAMZA_VERSION") + testCompile(group: 'junit', name: 'junit', version: "4.12") } // make the samza distribution .tgz file @@ -100,7 +107,7 @@ task distTar(dependsOn: build, type: Tar) { from configurations.runtime from configurations.runtime.artifacts.files from("src/main/resources/") { - include "log4j.xml" + include "log4j2.xml" } } } diff --git a/gradle.properties b/gradle.properties index 1063c21a..2ca7a85c 100644 --- a/gradle.properties +++ b/gradle.properties @@ -17,7 +17,7 @@ * under the License. */ -SAMZA_VERSION=1.1.1-SNAPSHOT +SAMZA_VERSION=1.4.0-SNAPSHOT KAFKA_VERSION=0.11.0.2 HADOOP_VERSION=2.6.1 diff --git a/pom.xml b/pom.xml index 38cabd1d..49f4eff1 100644 --- a/pom.xml +++ b/pom.xml @@ -27,7 +27,7 @@ under the License. org.apache.samza hello-samza - 1.3.0-SNAPSHOT + 1.4.0-SNAPSHOT jar Samza Example @@ -61,6 +61,17 @@ under the License. org.apache.samza samza-core_2.11 ${samza.version} + + + + log4j + log4j + + + org.slf4j + slf4j-log4j12 + + org.apache.samza @@ -95,14 +106,8 @@ under the License. org.apache.samza - samza-log4j_2.11 + samza-log4j2_2.11 ${samza.version} - - - org.apache.samza - samza-core_2.12 - - org.apache.samza @@ -115,6 +120,17 @@ under the License. org.apache.samza samza-yarn_2.11 ${samza.version} + + + + log4j + log4j + + + org.slf4j + slf4j-log4j12 + + org.apache.samza @@ -131,16 +147,6 @@ under the License. samza-kafka_2.11 ${samza.version} - - org.apache.samza - samza-aws_2.11 - ${samza.version} - - - org.apache.kafka - kafka_2.11 - 0.11.0.2 - org.schwering irclib @@ -151,55 +157,127 @@ under the License. slf4j-api 1.6.2 - - org.slf4j - slf4j-log4j12 - 1.6.2 - org.codehaus.jackson jackson-jaxrs 1.9.13 + + org.apache.httpcomponents + httpcore + 4.4.1 + org.apache.hadoop hadoop-annotations ${hadoop.version} + + + + log4j + log4j + + + org.slf4j + slf4j-log4j12 + + org.apache.hadoop hadoop-auth ${hadoop.version} + + + + log4j + log4j + + + org.slf4j + slf4j-log4j12 + + org.apache.hadoop hadoop-common ${hadoop.version} - - - org.apache.httpcomponents - httpcore - 4.4.1 + + + + log4j + log4j + + + org.slf4j + slf4j-log4j12 + + org.apache.hadoop hadoop-hdfs ${hadoop.version} + + + + log4j + log4j + + + org.slf4j + slf4j-log4j12 + + org.apache.hadoop hadoop-yarn-api ${hadoop.version} + + + + log4j + log4j + + + org.slf4j + slf4j-log4j12 + + org.apache.hadoop hadoop-yarn-client ${hadoop.version} + + + + log4j + log4j + + + org.slf4j + slf4j-log4j12 + + org.apache.hadoop hadoop-yarn-common ${hadoop.version} + + + + log4j + log4j + + + org.slf4j + slf4j-log4j12 + + com.google.guava @@ -211,7 +289,7 @@ under the License. UTF-8 - 1.3.0-SNAPSHOT + 1.4.0-SNAPSHOT 2.6.1 diff --git a/src/main/assembly/src.xml b/src/main/assembly/src.xml index 1614aafe..5fac44a4 100644 --- a/src/main/assembly/src.xml +++ b/src/main/assembly/src.xml @@ -41,7 +41,7 @@ - ${basedir}/src/main/resources/log4j.xml + ${basedir}/src/main/resources/log4j2.xml lib diff --git a/src/main/resources/log4j.xml b/src/main/resources/log4j.xml deleted file mode 100644 index 805d5caa..00000000 --- a/src/main/resources/log4j.xml +++ /dev/null @@ -1,52 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/src/main/resources/log4j2.xml b/src/main/resources/log4j2.xml new file mode 100644 index 00000000..adcdadb1 --- /dev/null +++ b/src/main/resources/log4j2.xml @@ -0,0 +1,52 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/test/resources/log4j2-test.xml b/src/test/resources/log4j2-test.xml new file mode 100644 index 00000000..c095c001 --- /dev/null +++ b/src/test/resources/log4j2-test.xml @@ -0,0 +1,37 @@ + + + + + + + + + + + + + + + + + + From ab222cf3b3285569fe2cf2d2e7e8741052195789 Mon Sep 17 00:00:00 2001 From: Cameron Lee Date: Wed, 29 Jan 2020 18:33:20 -0800 Subject: [PATCH 2/5] wip --- build.gradle | 124 +++++++++++++++++- gradle.properties | 4 +- ...edia-application-with-framework.properties | 41 ++++++ 3 files changed, 164 insertions(+), 5 deletions(-) create mode 100644 src/main/config/wikipedia-application-with-framework.properties diff --git a/build.gradle b/build.gradle index 427409c4..b4b3e659 100644 --- a/build.gradle +++ b/build.gradle @@ -36,7 +36,6 @@ repositories { maven { url "https://repository.apache.org/content/groups/public" } } - idea { module { sourceDirs += file('src/main/java') @@ -44,9 +43,29 @@ idea { } } -// a configuration for dependencies that need exploding into package configurations { + // a configuration for dependencies that need exploding into package explode + + // configuration for runtime for Samza framework API package + frameworkApiRuntime { + // allowing logging to be chosen by application, so exclude log4j from here + exclude group: 'log4j', module: 'log4j' + exclude group: 'org.slf4j', module: 'slf4j-log4j12' + } + // configuration for generating class names for Samza framework API + frameworkApiClasses { + // only want to generate class names for the classes directly in the dependencies, not for transitive dependencies + transitive = false + } + // configuration for runtime for Samza framework infrastructure package + frameworkInfrastructureRuntime { + // allowing logging to be chosen by application, so exclude log4j from here + exclude group: 'log4j', module: 'log4j' + exclude group: 'org.slf4j', module: 'slf4j-log4j12' + } + // configuration for extracting scripts for running Samza in the framework infrastructure package + frameworkInfrastructureExplode } configurations.all { @@ -58,7 +77,8 @@ configurations.all { dependencies { compile(group: 'org.codehaus.jackson', name: 'jackson-jaxrs', version: '1.9.13') - compile(group: 'org.slf4j', name: 'slf4j-api', version: "$SLF4J_VERSION") + // slf4j-api 1.8.0-alpha2 uses ServiceProvider which requires additional set-up for split deployment, so pinning down the version + compile(group: 'org.slf4j', name: 'slf4j-api', version: "$SLF4J_VERSION") { force = true } compile(group: 'org.schwering', name: 'irclib', version: '1.10') compile(group: 'org.apache.samza', name: 'samza-api', version: "$SAMZA_VERSION") compile(group: 'org.apache.samza', name: 'samza-azure_2.11', version: "$SAMZA_VERSION") @@ -73,8 +93,26 @@ dependencies { runtime(group: 'org.apache.samza', name: 'samza-yarn_2.11', version: "$SAMZA_VERSION") runtime(group: 'org.apache.kafka', name: 'kafka_2.11', version: "$KAFKA_VERSION") runtime(group: 'org.apache.hadoop', name: 'hadoop-hdfs', version: "$HADOOP_VERSION") + testCompile(group: 'org.apache.samza', name: 'samza-test_2.11', version: "$SAMZA_VERSION") testCompile(group: 'junit', name: 'junit', version: "4.12") + + // dependencies for framework API package + frameworkApiRuntime(group: 'org.apache.samza', name: 'samza-api', version: "$SAMZA_VERSION") + frameworkApiRuntime(group: 'org.apache.samza', name: 'samza-kv_2.11', version: "$SAMZA_VERSION") + frameworkApiRuntime(group: 'org.apache.samza', name: 'samza-log4j2_2.11', version: "$SAMZA_VERSION") // pulls in log4j2 dependencies + frameworkApiRuntime(group: 'org.slf4j', name: 'slf4j-api', version: "$SLF4J_VERSION") { force = true } + // need to specify the classes in these modules as framework API classes + frameworkApiClasses(group: 'org.apache.samza', name: 'samza-api', version: "$SAMZA_VERSION") + frameworkApiClasses(group: 'org.apache.samza', name: 'samza-kv_2.11', version: "$SAMZA_VERSION") + + // dependencies for framework infrastructure package + frameworkInfrastructureRuntime(group: 'org.apache.samza', name: 'samza-core_2.11', version: "$SAMZA_VERSION") + frameworkInfrastructureRuntime(group: 'org.apache.samza', name: 'samza-kafka_2.11', version: "$SAMZA_VERSION") + frameworkInfrastructureRuntime(group: 'org.apache.samza', name: 'samza-log4j2_2.11', version: "$SAMZA_VERSION") + frameworkInfrastructureRuntime(group: 'org.apache.samza', name: 'samza-yarn_2.11', version: "$SAMZA_VERSION") + frameworkInfrastructureRuntime(group: 'org.slf4j', name: 'slf4j-api', version: "$SLF4J_VERSION") { force = true } + frameworkInfrastructureExplode(group: 'org.apache.samza', name: 'samza-shell', ext: 'tgz', classifier: 'dist', version: "$SAMZA_VERSION") } // make the samza distribution .tgz file @@ -89,6 +127,7 @@ task distTar(dependsOn: build, type: Tar) { include "wikipedia-parser.properties" include "wikipedia-stats.properties" include "wikipedia-application.properties" + include "wikipedia-application-with-framework.properties" // expand the Maven tokens with Gradle equivalents. Also change 'target' (Maven) to 'build/distributions' (Gradle) filter { String line -> @@ -112,6 +151,85 @@ task distTar(dependsOn: build, type: Tar) { } } +// +// Job coordinator isolation tasks +// + +def SAMZA_FRAMEWORK_TMP_DIR_NAME = "samzaFrameworkTmp" + +task classListGeneration(type: ClassListGenerationTask) { + classListOutputDirectory(new File(project.buildDir, SAMZA_FRAMEWORK_TMP_DIR_NAME)) + customClassListEntries = [ + // for log4j2 classes (both api and core classes) + "org.apache.logging.log4j.*" + ] +} + +// build the framework API .tgz file +task frameworkApiDistTar(dependsOn: classListGeneration, type: Tar) { + baseName("samza-hello-samza-frameworkApi") + destinationDir(new File(project.buildDir, "/distributions")) + compression(Compression.GZIP) + classifier('dist') + extension('tar.gz') + into("lib") { + from configurations.frameworkApiRuntime + from new File(new File(project.buildDir, SAMZA_FRAMEWORK_TMP_DIR_NAME), ClassListGenerationTask.SAMZA_FRAMEWORK_API_CLASSES_FILE_NAME) + } +} + +/** + * Generates the framework API class list file by looking at the structure of the JARs in the frameworkApiClasses configuration. + */ +class ClassListGenerationTask extends DefaultTask { + static String SAMZA_FRAMEWORK_API_CLASSES_FILE_NAME = "samza-framework-api-classes.txt" + File classListOutputDirectory; + List customClassListEntries; + + @TaskAction + def generateClassList() { + def classExtension = ".class" + def classNames = [] as Set + classNames.addAll(customClassListEntries) + def jarFiles = project.configurations.frameworkApiClasses.files + .findAll { it.name.endsWith(".jar") } + .each { project.zipTree(it) + .matching { pattern -> pattern.include("**/*" + classExtension) } + .visit(new FileVisitor() { + public void visitDir(FileVisitDetails fileVisitDetails) { + // we only care about class files, so no need to do anything for directories + } + public void visitFile(FileVisitDetails fileVisitDetails) { + def path = fileVisitDetails.getPath() + classNames.add(path.substring(0, path.length() - classExtension.length()).replace("/", ".")) + } + }) + } + classListOutputDirectory.mkdirs() + def classListOutputFile = new File(classListOutputDirectory, SAMZA_FRAMEWORK_API_CLASSES_FILE_NAME) + classListOutputFile.text = classNames.sort().join("\n") + } +} + + +// build the framework infrastructure .tgz file +task frameworkInfrastructureDistTar(type: Tar) { + baseName("samza-hello-samza-frameworkInfrastructure") + destinationDir(new File(project.buildDir, "/distributions")) + compression(Compression.GZIP) + classifier('dist') + extension('tar.gz') + into("bin") { + from { + configurations.frameworkInfrastructureExplode.collect { tarTree(it) } + } + } + into("lib") { + from configurations.frameworkInfrastructureRuntime + } +} + + // install everything task installGrid(type: Exec) { workingDir(project.projectDir) diff --git a/gradle.properties b/gradle.properties index c241bb45..f20de40b 100644 --- a/gradle.properties +++ b/gradle.properties @@ -21,5 +21,5 @@ SAMZA_VERSION=1.4.0-SNAPSHOT KAFKA_VERSION=0.11.0.2 HADOOP_VERSION=2.7.1 -SLF4J_VERSION = 1.7.7 - +// slf4j-api 1.8.0-alpha2 uses ServiceProvider which requires additional set-up for split deployment, so pinning down the version +SLF4J_VERSION=1.7.7 diff --git a/src/main/config/wikipedia-application-with-framework.properties b/src/main/config/wikipedia-application-with-framework.properties new file mode 100644 index 00000000..919346c7 --- /dev/null +++ b/src/main/config/wikipedia-application-with-framework.properties @@ -0,0 +1,41 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# Application / Job +app.class=samza.examples.wikipedia.application.WikipediaApplication +job.factory.class=org.apache.samza.job.yarn.YarnJobFactory +job.name=wikipedia-application + +# YARN +yarn.package.path=file://${basedir}/target/${project.artifactId}-${pom.version}-dist.tar.gz + +# Serializers +serializers.registry.string.class=org.apache.samza.serializers.StringSerdeFactory +serializers.registry.integer.class=org.apache.samza.serializers.IntegerSerdeFactory + +# Key-value storage +stores.wikipedia-stats.factory=org.apache.samza.storage.kv.RocksDbKeyValueStorageEngineFactory +stores.wikipedia-stats.changelog=kafka.wikipedia-stats-changelog +stores.wikipedia-stats.key.serde=string +stores.wikipedia-stats.msg.serde=integer + +# enabling usage of Samza framework for job coordinator isolation +samza.cluster.based.job.coordinator.dependency.isolation.enabled=true +yarn.resources.__samzaFrameworkApi.path=file://${basedir}/target/samza-hello-samza-frameworkApi-${pom.version}-dist.tar.gz +yarn.resources.__samzaFrameworkApi.local.type=ARCHIVE +yarn.resources.__samzaFrameworkInfrastructure.path=file://${basedir}/target/samza-hello-samza-frameworkInfrastructure-${pom.version}-dist.tar.gz +yarn.resources.__samzaFrameworkInfrastructure.local.type=ARCHIVE From d6916e38b32d0685c682f926163b6298bbd4ce9d Mon Sep 17 00:00:00 2001 From: Cameron Lee Date: Thu, 30 Jan 2020 11:06:59 -0800 Subject: [PATCH 3/5] missed a merge section --- build.gradle | 3 --- 1 file changed, 3 deletions(-) diff --git a/build.gradle b/build.gradle index 6d391e28..34378fb8 100644 --- a/build.gradle +++ b/build.gradle @@ -154,7 +154,6 @@ task distTar(dependsOn: build, type: Tar) { from configurations.runtime.artifacts.files from("src/main/resources/") { include "log4j2.xml" -<<<<<<< HEAD } } } @@ -230,8 +229,6 @@ task frameworkInfrastructureDistTar(type: Tar) { into("bin") { from { configurations.frameworkInfrastructureExplode.collect { tarTree(it) } -======= ->>>>>>> latest } } into("lib") { From c67dff8ad67bc89dd5caf8435392754b1fc8500a Mon Sep 17 00:00:00 2001 From: Cameron Lee Date: Thu, 30 Jan 2020 11:14:22 -0800 Subject: [PATCH 4/5] clarifying some dependencies --- build.gradle | 25 +++++++++---------------- 1 file changed, 9 insertions(+), 16 deletions(-) diff --git a/build.gradle b/build.gradle index 34378fb8..c89ea363 100644 --- a/build.gradle +++ b/build.gradle @@ -49,9 +49,9 @@ configurations { // configuration for runtime for Samza framework API package frameworkApiRuntime { - // allowing logging to be chosen by application, so exclude log4j from here - exclude group: 'log4j', module: 'log4j' - exclude group: 'org.slf4j', module: 'slf4j-log4j12' + // using log4j2, so exclude log4j dependencies + exclude group: 'log4j', module: 'log4j' + exclude group: 'org.slf4j', module: 'slf4j-log4j12' } // configuration for generating class names for Samza framework API frameworkApiClasses { @@ -60,26 +60,19 @@ configurations { } // configuration for runtime for Samza framework infrastructure package frameworkInfrastructureRuntime { - // allowing logging to be chosen by application, so exclude log4j from here - exclude group: 'log4j', module: 'log4j' - exclude group: 'org.slf4j', module: 'slf4j-log4j12' + // using log4j2, so exclude log4j dependencies + exclude group: 'log4j', module: 'log4j' + exclude group: 'org.slf4j', module: 'slf4j-log4j12' } // configuration for extracting scripts for running Samza in the framework infrastructure package frameworkInfrastructureExplode } -configurations.all { - // using log4j1 to log4j2 bridge so need to exclude log4j1 - exclude group: 'log4j', module: 'log4j' - // exclude all other slf4j bindings that are transitively pulled in - exclude group: 'org.slf4j', module: 'slf4j-log4j12' -} - -configurations.all { +configurations { // using log4j1 to log4j2 bridge so need to exclude log4j1 - exclude group: 'log4j', module: 'log4j' + runtime.exclude group: 'log4j', module: 'log4j' // exclude all other slf4j bindings that are transitively pulled in - exclude group: 'org.slf4j', module: 'slf4j-log4j12' + runtime.exclude group: 'org.slf4j', module: 'slf4j-log4j12' } dependencies { From 84bb9f1b9a59877db5ca01c64c372f54359fb032 Mon Sep 17 00:00:00 2001 From: Cameron Lee Date: Thu, 30 Jan 2020 11:33:39 -0800 Subject: [PATCH 5/5] adding comment about framework --- build.gradle | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/build.gradle b/build.gradle index c89ea363..6fc78ba6 100644 --- a/build.gradle +++ b/build.gradle @@ -43,6 +43,9 @@ idea { } } +// NOTE: All of the "framework" pieces are for job coordinator dependency isolation. If you are +// just doing regular packaging for a Samza job, then those pieces can be ignored. + configurations { // a configuration for dependencies that need exploding into package explode @@ -211,7 +214,6 @@ class ClassListGenerationTask extends DefaultTask { } } - // build the framework infrastructure .tgz file task frameworkInfrastructureDistTar(type: Tar) { baseName("samza-hello-samza-frameworkInfrastructure")