Poor performance when upgrading from 2.12.2 to 2.13.3
See original GitHub issueDescribe the bug When serializing an object to JSON, the serialization time increases with the number of jars in the classpath. I find this issue while upgrading jackson-databind and jackson-module-scala_2.12 from 2.12.2 to 2.13.3.
Version information 2.13.3
To Reproduce If you have a way to reproduce this with:
JacksonTestServer.java:
import io.prometheus.client.exporter.HTTPServer;
import org.glassfish.grizzly.http.server.HttpServer;
import org.glassfish.jersey.grizzly2.httpserver.GrizzlyHttpServerFactory;
import org.glassfish.jersey.server.ResourceConfig;
import javax.ws.rs.core.UriBuilder;
import java.io.IOException;
import java.net.URI;
public class JacksonTestServer {
public static void main(String[] args) throws IOException {
URI baseUri = UriBuilder
.fromUri("http://0.0.0.0/")
.port(9990)
.build();
final ResourceConfig rc = new ResourceConfig()
.register(JacksonHTTPService.class);
HttpServer server = GrizzlyHttpServerFactory.createHttpServer(baseUri, rc);
Runtime.getRuntime().addShutdownHook(new Thread(server::shutdown, "shutdownHook"));
try (HTTPServer monitorServer =
new io.prometheus.client.exporter.HTTPServer.Builder()
.withPort(9991)
.build()) {
server.start();
Thread.currentThread().join();
} catch (InterruptedException e) {
throw new RuntimeException(e);
}
}
}
JacksonHTTPService.java
import io.prometheus.client.Counter;
import io.prometheus.client.Histogram;
import javax.ws.rs.*;
import javax.ws.rs.core.MediaType;
import java.util.Random;
@Path("test")
public class JacksonHTTPService {
private static final Random random = new Random();
private static final Histogram requestLatency = Histogram.build()
.namespace("http")
.name("requests_latency_seconds")
.help("avc")
.buckets(new double[]{0.001D, 0.005D, 0.008D, 0.01D, 0.015D, 0.020D, 0.025D, 0.03D, 0.04D, 0.05D, 0.06D,
0.07D, 0.08D, 0.09D, 0.1D, 1D, 2D, 5D, 10D})
.labelNames("http_service", "http_method")
.register();
private static final Counter requests = Counter.build()
.namespace("http")
.name("requests_total")
.help("avc")
.labelNames("http_service", "http_method")
.register();
@GET
@Path("serialize")
@Produces(MediaType.APPLICATION_JSON)
public String getRandomString() {
requests.labels("test", "getRandomString").inc();
try (Histogram.Timer requestTimer = requestLatency.labels(
"test", "getRandomString").startTimer()) {
JacksonJsonSerializer jacksonJsonSerializer = new JacksonJsonSerializer();
int[] ids = random.ints(15, 0, 1000).toArray();
float[] probs = new float[15];
for (int j = 0; j < 15; j++) {
probs[j] = random.nextFloat();
}
try {
Thread.sleep(20);
} catch (InterruptedException e) {
throw new RuntimeException(e);
}
IDProbList testIDList = new IDProbList(ids, probs);
return jacksonJsonSerializer.serialize(testIDList);
}
}
@GET
@Path("hello")
@Produces(MediaType.TEXT_PLAIN)
public String hello() {
requests.labels("test", "hello").inc();
try (Histogram.Timer requestTimer = requestLatency.labels(
"test", "hello").startTimer()) {
return "Hello";
}
}
}
IDProbList.java
import io.grpc.Status;
import io.grpc.StatusRuntimeException;
import com.fasterxml.jackson.annotation.JsonIgnore;
import java.util.Arrays;
public class IDProbList {
private int[] ids;
private float[] probs;
private boolean success;
private Status.Code errorCode;
private String errorMsg;
public IDProbList(int[] ids, float[] probs) {
this.ids = ids;
this.probs = probs;
this.success = true;
}
public IDProbList(Status.Code errorCode, String errorMsg) {
this.success = false;
this.errorCode = errorCode;
this.errorMsg = errorMsg;
}
public int[] getIds() {
return ids;
}
public float[] getProbs() {
return probs;
}
public boolean isSuccess() {
return success;
}
@JsonIgnore
public StatusRuntimeException getgRPCException() {
return this.errorCode.toStatus().withDescription("CandidateNum" +
" should be larger than recommendNum.").asRuntimeException();
}
public String getErrorMsg() {
return errorMsg;
}
public Status.Code getErrorCode() {
return errorCode;
}
@Override
public String toString() {
return "IDProbList{" +
"ids=" + Arrays.toString(ids) +
", probs=" + Arrays.toString(probs) +
", success=" + success +
", errorCode=" + errorCode +
", errorMsg='" + errorMsg + '\'' +
'}';
}
}
serializers.scala
import com.fasterxml.jackson.databind.{DeserializationFeature, MapperFeature, ObjectMapper, SerializationFeature}
import com.fasterxml.jackson.module.scala.experimental.ScalaObjectMapper
import com.fasterxml.jackson.module.scala.DefaultScalaModule
trait SerializeSuported {
def serialize(src: Object): String
def deSerialize[T](clazz: Class[T], data: String): T
}
class JacksonJsonSerializer extends SerializeSuported {
val mapper = new ObjectMapper() with ScalaObjectMapper
mapper.registerModule(DefaultScalaModule)
mapper.configure(MapperFeature.ACCEPT_CASE_INSENSITIVE_PROPERTIES, true)
mapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false)
mapper.configure(SerializationFeature.FAIL_ON_EMPTY_BEANS, false)
mapper.configure(SerializationFeature.INDENT_OUTPUT, true)
mapper
override def serialize(src: Object): String = {
mapper.writeValueAsString(src)
}
override def deSerialize[T](clazz: Class[T], dest: String): T = {
mapper.readValue[T](dest, clazz)
}
}
pom.xml
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xmlns="http://maven.apache.org/POM/4.0.0"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>org.example</groupId>
<artifactId>jacksonBugReproduce</artifactId>
<version>jack213</version>
<dependencyManagement>
<dependencies>
<dependency>
<groupId>org.glassfish.jersey</groupId>
<artifactId>jersey-bom</artifactId>
<version>${jersey.version}</version>
<type>pom</type>
</dependency>
</dependencies>
</dependencyManagement>
<dependencies>
<dependency>
<groupId>org.glassfish.jersey.containers</groupId>
<artifactId>jersey-container-grizzly2-http</artifactId>
<version>${jersey.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>io.prometheus</groupId>
<artifactId>simpleclient_httpserver</artifactId>
<version>0.12.0</version>
</dependency>
<dependency>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-databind</artifactId>
<version>${jackson.version}</version>
</dependency>
<dependency>
<groupId>com.fasterxml.jackson.module</groupId>
<artifactId>jackson-module-scala_2.12</artifactId>
<version>${jackson.version}</version>
</dependency>
<dependency>
<groupId>org.glassfish.jersey.inject</groupId>
<artifactId>jersey-hk2</artifactId>
<version>${jersey.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>io.grpc</groupId>
<artifactId>grpc-api</artifactId>
<version>1.48.1</version>
</dependency>
</dependencies>
<properties>
<maven.compiler.source>8</maven.compiler.source>
<maven.compiler.target>8</maven.compiler.target>
<jackson.version>2.13.3</jackson.version>
<jersey.version>2.36</jersey.version>
</properties>
<build>
<plugins>
<plugin>
<artifactId>maven-assembly-plugin</artifactId>
<executions>
<execution>
<phase>package</phase>
<goals>
<goal>single</goal>
</goals>
</execution>
</executions>
<configuration>
<descriptorRefs>
<descriptorRef>jar-with-dependencies</descriptorRef>
</descriptorRefs>
</configuration>
</plugin>
</plugins>
</build>
</project>
./jersey
includes:
activation-1.1.1.jar
aopalliance-repackaged-2.6.1.jar
glassfish-copyright-maven-plugin-2.4.jar
grizzly-framework-2.4.4.jar
grizzly-http-2.4.4.jar
grizzly-http-server-2.4.4.jar
hk2-api-2.6.1.jar
hk2-locator-2.6.1.jar
hk2-utils-2.6.1.jar
jakarta.annotation-api-1.3.5.jar
jakarta.inject-2.6.1.jar
jakarta.validation-api-2.0.2.jar
jakarta.ws.rs-api-2.1.6.jar
jakarta.ws.rs-api-2.1.6-sources.jar
javax.ws.rs-api-2.0.1.jar
jersey-client-2.36.jar
jersey-common-2.36.jar
jersey-container-grizzly2-http-2.36.jar
jersey-container-servlet-2.36.jar
jersey-container-servlet-core-2.36.jar
jersey-hk2-2.36.jar
jersey-server-2.36.jar
jsr311-api-1.1.1.jar
osgi-resource-locator-1.0.1.jar
osgi-resource-locator-1.0.3.jar
./spark-3.12-bin-hadoop2.7
isextracted from spark-3.12-bin-hadoop2.7.tgz
I packaged this project as jar and test use these commands:
jackson 2.12.2
java -cp jacksonBugReproduce-jack212-jar-with-dependencies.jar:./jersey/*:./spark-3.1.2-bin-hadoop2.7/jars/* JacksonTestServer
# throughput: 1863.7/s mean: 26ms
jackson 2.13.3
java -cp jacksonBugReproduce-jack213-jar-with-dependencies.jar:./jersey/*:./spark-3.1.2-bin-hadoop2.7/jars/* JacksonTestServer
# throughput: 122.5/s mean: 282ms
jackson 2.13.3 remove spark
java -cp jacksonBugReproduce-jack213-jar-with-dependencies.jar:./jersey/* JacksonTestServer
# throughput: 1338.5/s mean: 37ms
At first, I thought that a specific jar in spark would cause the extra delay. But after I split all the jars in spark into two parts randomly and only import one part, the performance improved compared to importing all the packages. I repeated the segmentation several times and found that the delay is proportional to the number of imported jars.
I also use arthas to sample some profilers. Hope to help locate the problem. profilers.zip
Expected behavior Reduce the extra serialization time caused by importing jar packages
Additional context Add any other context about the problem here.
Issue Analytics
- State:
- Created a year ago
- Comments:12 (8 by maintainers)
Top GitHub Comments
I raised https://issues.apache.org/jira/browse/SPARK-40911 and https://github.com/FasterXML/jackson-module-scala/issues/610
@cowtowncoder Thanks for such a quick reply, I can only try to get the flamegraphs for
2.14.0-rc1
because of the dev machine’s settings. profilers-214rc1.zipI try to get a new group of flame graphs including rc2’s. flamegraphs.zip