最有效的(CPU 和内存)解析方式是使用面向流的解析而不是对象映射。通常,需要编写更多的代码,但通常也是很划算的:) Gson 和 Jackson 都支持这种轻量级技术。另外,您应该避免在主/热路径中分配内存,以防止 GC 暂停。为了说明这个想法,我使用了一个小型的无 GC 库https://github.com/anatolygudkov/green-jelly https://github.com/anatolygudkov/green-jelly:
import org.green.jelly.*;
import java.io.CharArrayReader;
import java.io.Reader;
import java.util.ArrayList;
import java.util.List;
public class SelectById {
public static class Customer {
private long id;
private String name;
private String email;
public void clear() {
id = 0;
name = null;
email = null;
}
public Customer makeCopy() {
Customer result = new Customer();
result.id = id;
result.name = name;
result.email = email;
return result;
}
@Override
public String toString() {
return "Customer{" +
"id=" + id +
", name='" + name + '\'' +
", email='" + email + '\'' +
'}';
}
}
public static void main(String[] args) throws Exception {
final String file = "\n" +
"[\n" +
" {\n" +
" \"id\": 1,\n" +
" \"name\": \"Mark Robb\",\n" +
" \"last_login\": \"2013-01-21T05:13:41 -11:30\",\n" +
" \"email\": \"[email protected] /cdn-cgi/l/email-protection\",\n" +
" \"phone\": \"12345\",\n" +
" \"locations\": [\n" +
" \"Germany\",\n" +
" \"Austria\"\n" +
" ]\n" +
"},\n" +
" {\n" +
" \"id\": 2,\n" +
" \"name\": \"Matt Nish\",\n" +
" \"last_login\": \"2014-02-21T07:10:41 -11:30\",\n" +
" \"email\": \"[email protected] /cdn-cgi/l/email-protection\",\n" +
" \"phone\": \"456123\",\n" +
" \"locations\": [\n" +
" \"France\",\n" +
" \"Italy\"\n" +
" ]\n" +
" }\n" +
"]\n";
final List<Customer> selection = new ArrayList<>();
final long selectionId = 2;
final JsonParser parser = new JsonParser().setListener(
new JsonParserListenerAdaptor() {
private final Customer customer = new Customer();
private String currentField;
@Override
public boolean onObjectStarted() {
customer.clear();
return true;
}
@Override
public boolean onObjectMember(final CharSequence name) {
currentField = name.toString();
return true;
}
@Override
public boolean onStringValue(final CharSequence data) {
switch (currentField) {
case "name":
customer.name = data.toString();
break;
case "email":
customer.email = data.toString();
break;
}
return true;
}
@Override
public boolean onNumberValue(final JsonNumber number) {
if ("id".equals(currentField)) {
customer.id = number.mantissa();
}
return true;
}
@Override
public boolean onObjectEnded() {
if (customer.id == selectionId) {
selection.add(customer.makeCopy());
return false; // we don't need to continue
}
return true;
}
}
);
// now let's read and parse the data with a buffer
final CharArrayCharSequence buffer = new CharArrayCharSequence(1024);
try (final Reader reader = new CharArrayReader(file.toCharArray())) { // replace by FileReader, for example
int len;
while((len = reader.read(buffer.getChars())) != -1) {
buffer.setLength(len);
parser.parse(buffer);
}
}
parser.eoj();
System.out.println(selection);
}
}
它在 Java 中的运行速度应该尽可能快(以防我们无法直接使用 SIMD 指令)。要完全摆脱主路径中的内存分配(和 GC 暂停),您必须将“.toString()”(它创建 String 的新实例)替换为可重用的东西,例如 StringBuilder。
最后可能影响整体性能的是文件读取的方法。 RandomAccessFile 是 Java 中最好的选择之一。由于您的编码似乎是 ASCII,因此只需将字节转换为字符即可传递给 JsonParser。