使用 Protostuff 来序列化、读取和写入文件, 速度很快。 这是我测试过的所有方法中最快的,它可以用作一个简单的本地文件数据库,可以储存二进制,或者纯文本文件,具体实现方式看个人喜好,这里本人采用二进制方式存储文件。 常规文件工具的读写方式有很多种,但是对于简单的读写,目前JAVA NIO方式是最快的。 FileUtil.java package protoBuf; import java.io.*; import java.nio.ByteBuffer; import java.nio.MappedByteBuffer; import java.nio.channels.FileChannel; import java.util.List; public class FileUtil { public static final int BUFSIZE = 1024 * 8; /** * 通过追加写入二进制数据 */ public static void writeByte2File(byte[] bytes, String writePath) { try { FileOutputStream fos = new FileOutputStream(writePath, true); fos.write(bytes); fos.flush(); fos.close(); } catch (Exception e) { e.printStackTrace(); } } /** * 添加了在不关闭文件的情况下写入二进制数据 */ public static void writeByte2FileFlush(byte[] bytes, String writePath) { try { FileOutputStream fos = new FileOutputStream(writePath, true); fos.write(bytes); fos.flush(); } catch (Exception e) { e.printStackTrace(); } } /** * Java NIO 模式追加写入 * @param filepath * @param contentList 要写入的文件内容 * @param bufferSize 单次写缓冲区大小 默认4M 1024 * 1024 * 4 */ public static void write2FileChannel(String filepath, List<String> contentList, Integer bufferSize) { bufferSize = null == bufferSize ? 4194304 : bufferSize; ByteBuffer buf = ByteBuffer.allocate(bufferSize); FileChannel channel = null; try { File fileTemp = new File(filepath); File parent = fileTemp.getParentFile(); if (!parent.exists()) parent.mkdirs(); if (!fileTemp.exists()) fileTemp.createNewFile(); channel = new FileOutputStream(filepath, true).getChannel(); for (int i = 0; i < contentList.size(); i++) { buf.put((contentList.get(i) + "\r\n").getBytes()); } buf.flip(); // 切换到可读模式 while (buf.hasRemaining()) { channel.write(buf); } } catch (Exception e) { e.printStackTrace(); } finally { try { channel.close(); } catch (IOException e) { e.printStackTrace(); } } } /** * NIO模式合并文件 */ public static void mergeFiles(File outFile, String[] files) { FileChannel outChannel = null; try { outChannel = new FileOutputStream(outFile).getChannel(); for (String f : files) { if (null != f) { FileChannel fc = new FileInputStream(f).getChannel(); ByteBuffer bb = ByteBuffer.allocate(BUFSIZE); while (fc.read(bb) != -1) { bb.flip(); outChannel.write(bb); bb.clear(); } fc.close(); } } } catch (IOException ioe) { ioe.printStackTrace(); } finally { try { if (outChannel != null) { outChannel.close(); } } catch (IOException ignore) { } } } /** * 添加二进制数据写入,使用固定流,不关闭文件 */ public static FileOutputStream writeByte2FileFlush2Stream(FileOutputStream fos, byte[] bytes, String writePath) { try { if (fos == null) { fos = new FileOutputStream(writePath, true); } fos.write(bytes); fos.flush(); } catch (Exception e) { e.printStackTrace(); } return fos; } /** * NIO方式一次将文件内容读入内存 */ public static byte[] readDataFromFile(String filePath) throws Exception { //get all data from file RandomAccessFile file = new RandomAccessFile(filePath, "rw"); FileChannel fileChannel = file.getChannel(); MappedByteBuffer buffer = fileChannel.map(FileChannel.MapMode.READ_ONLY, 0, fileChannel.size()); byte res[] = new byte[buffer.capacity()]; buffer.get(res); return res; } public static void main(String[] args) { for(int i=0;i<100;i++){ writeByte2FileFlush(new String("test"+i).getBytes(), "E:\\testNull.txt"); } try{ byte[] file = readDataFromFile("E:\\testNull.txt"); System.out.println("file="+new String(file)); }catch(Exception e){ e.printStackTrace(); } } } 简单的对象包装类 WrapperUtil.java package protoBuf; public class WrapperUtil<T> { private T data; public static <T> WrapperUtil<T> builder(T data) { WrapperUtil<T> wrapper = new WrapperUtil<>(); wrapper.setData(data); return wrapper; } public T getData() { return data; } public void setData(T data) { this.data = data; } } Protostuff 的主要工具类有序列化和反序列化方法,这些方法是加速数据处理的关键。 ProtoBufUtil.java package protoBuf; import com.google.common.collect.Maps; import io.protostuff.LinkedBuffer; import io.protostuff.ProtostuffIOUtil; import io.protostuff.Schema; import io.protostuff.runtime.RuntimeSchema; import org.springframework.objenesis.Objenesis; import org.springframework.objenesis.ObjenesisStd; import java.util.*; import java.util.concurrent.CopyOnWriteArrayList; public class ProtoBufUtil { private static Objenesis objenesis = new ObjenesisStd(true); /** * 需要使用包装类进行序列化/反序列化的类的集合 */ private static final Set<Class<?>> WRAPPER_SET = new HashSet<>(); /** * 序列化/反序列化包装类类对象 */ private static final Class<WrapperUtil> WRAPPER_CLASS = WrapperUtil.class; /** * 序列化/反序列化包装类模式对象 */ private static final Schema<WrapperUtil> WRAPPER_SCHEMA = RuntimeSchema.createFrom(WRAPPER_CLASS); /** * 缓存对象及对象schema信息集合 */ private static final Map<Class<?>, Schema<?>> CACHE_SCHEMA = Maps.newConcurrentMap(); /** * 预定义一些Protostuff无法直接序列化/反序列化的对象 */ static { WRAPPER_SET.add(List.class); WRAPPER_SET.add(ArrayList.class); WRAPPER_SET.add(CopyOnWriteArrayList.class); WRAPPER_SET.add(LinkedList.class); WRAPPER_SET.add(Stack.class); WRAPPER_SET.add(Vector.class); WRAPPER_SET.add(Map.class); WRAPPER_SET.add(HashMap.class); WRAPPER_SET.add(TreeMap.class); WRAPPER_SET.add(Hashtable.class); WRAPPER_SET.add(SortedMap.class); WRAPPER_SET.add(Map.class); WRAPPER_SET.add(Object.class); } public ProtoBufUtil() { } @SuppressWarnings({"unchecked"}) public static <T> byte[] serializer(T obj) { Class<T> cls = (Class<T>) obj.getClass(); LinkedBuffer buffer = LinkedBuffer.allocate(LinkedBuffer.DEFAULT_BUFFER_SIZE); try { Schema<T> schema = getSchema(cls); return ProtostuffIOUtil.toByteArray(obj, schema, buffer); } catch (Exception e) { System.out.println("protobuf serializer fail"); throw new IllegalStateException(e.getMessage(), e); } finally { buffer.clear(); } } public static <T> T deserializer(byte[] bytes, Class<T> clazz) { try { T message = (T) objenesis.newInstance(clazz); Schema<T> schema = getSchema(clazz); ProtostuffIOUtil.mergeFrom(bytes, message, schema); return message; } catch (Exception e) { System.out.println("protobuf deserializer fail"); throw new IllegalStateException(e.getMessage(), e); } } /** * 注册需要使用包装类进行序列化/反序列化的 Class 对象 * * @param clazz 需要包装的类型 Class 对象 */ public static void registerWrapperClass(Class clazz) { WRAPPER_SET.add(clazz); } /** *获取序列化对象类型的schema * * @param cls 序列化对象的class * @param <T> 序列化对象的类型 * @return 序列化对象类型的schema */ @SuppressWarnings({"unchecked", "rawtypes"}) private static <T> Schema<T> getSchema(Class<T> cls) { Schema<T> schema = (Schema<T>) CACHE_SCHEMA.get(cls); if (schema == null) { schema = RuntimeSchema.createFrom(cls); CACHE_SCHEMA.put(cls, schema); } return schema; } /** * 序列化对象 * * @param obj 需要序列化的对象 * @param <T> 序列化对象的类型 * @return 序列化后的二进制数组 */ @SuppressWarnings("unchecked") public static <T> byte[] serializeCollect(T obj) { Class<T> clazz = (Class<T>) obj.getClass(); LinkedBuffer buffer = LinkedBuffer.allocate(LinkedBuffer.DEFAULT_BUFFER_SIZE); try { Object serializeObject = obj; Schema schema = WRAPPER_SCHEMA; if (!WRAPPER_SET.contains(clazz)) { schema = getSchema(clazz); } else { serializeObject = WrapperUtil.builder(obj); } return ProtostuffIOUtil.toByteArray(serializeObject, schema, buffer); } catch (Exception e) { System.out.println("Exception"); throw new IllegalStateException(e.getMessage(), e); } finally { buffer.clear(); } } /** * 反序列化对象 * * @param data 需要反序列化的二进制数组 * @param clazz 反序列化后的对象class * @param <T> 反序列化后的对象类型 * @return 反序列化后的对象集合 * SerializeDeserializeWrapper wrapper = SerializeDeserializeWrapper.builder(list); * byte[] serializeBytes = ProtostuffUtils.serialize(wrapper); * long end4 = System.currentTimeMillis(); * SerializeDeserializeWrapper deserializeWrapper = ProtostuffUtils.deserialize(serializeBytes, SerializeDeserializeWrapper.class); */ public static <T> T deserializeCollect(byte[] data, Class<T> clazz) { try { if (!WRAPPER_SET.contains(clazz)) { T message = clazz.newInstance(); Schema<T> schema = getSchema(clazz); ProtostuffIOUtil.mergeFrom(data, message, schema); return message; } else { WrapperUtil<T> wrapper = new WrapperUtil<T>(); ProtostuffIOUtil.mergeFrom(data, wrapper, WRAPPER_SCHEMA); return wrapper.getData(); } } catch (Exception e) { System.out.println("deserialize exception"); throw new IllegalStateException(e.getMessage(), e); } } public static byte[] subBytes(byte[] src, int begin, int count) { byte[] bs = new byte[count]; for (int i = begin; i < begin + count; i++) bs[i - begin] = src[i]; return bs; } public static byte[] intToByteArray(int i) { byte[] result = new byte[4]; result[0] = (byte) ((i >> 24) & 0xFF); result[1] = (byte) ((i >> 16) & 0xFF); result[2] = (byte) ((i >> 8) & 0xFF); result[3] = (byte) (i & 0xFF); return result; } public static int byteArrayToInt(byte[] bytes) { int value = 0; for (int i = 0; i < 4; i++) { int shift = (3 - i) * 8; value += (bytes[i] & 0xFF) << shift; } return value; } } 测试类自定义了一个简单的数据结构来写入文件、读取文件、反序列化数据并将该数据放入 JAVA 对象中。 Protostuff 目前对于这个操作序列是最快的,并且对于数百万和数千万的数据量来说非常快。 ProtoUsage.java package protoBuf; import java.io.File; import java.io.FileOutputStream; import java.util.ArrayList; import java.util.Arrays; import java.util.List; public class ProtoUsage { public static final String filePath = "E:\\testByte"; //params: list for test public static void writeByte2File(List<Product> prodList){ try{ if(new File(filePath).exists()){ new File(filePath).delete(); } FileOutputStream fos = new FileOutputStream(filePath, true); for (Product prod : prodList) { byte data[] = ProtoBufUtil.serializer(prod); byte dataLeng[] = ProtoBufUtil.intToByteArray(data.length); FileUtil.writeByte2FileFlush2Stream(fos, dataLeng, filePath); FileUtil.writeByte2FileFlush2Stream(fos, data, filePath); } }catch(Exception e){ e.printStackTrace(); } } public static void main(String[] args) { try{ int testCount=5000000; List<Product> prodList = new ArrayList<Product>(); for(int i=0;i<testCount;i++){ Product prod = new Product(); prod.setId("product="+i); prod.setName("product has a test name: testNo("+i+")"); prodList.add(prod); } long start = System.currentTimeMillis(); //将测试数据写入文件 writeByte2File(prodList); System.out.println("Write data time cost:"+(System.currentTimeMillis()-start)); //开始读取文件。 // ProtoStuff最大的优势就是非常快的序列化和反序列化速度, // 这节省了程序的数据处理时间。 . //首先一次读取所有数据 long treatStart = System.currentTimeMillis(); byte res[] = FileUtil.readDataFromFile(filePath); List<Product> resultProd = new ArrayList<Product>(); //二进制文件数据结构 // 0016testtesttesttest0018testestestestteste //0016(保存的此数据的长度)testtesttesttest0018(保存的此数据的长度)testestestestteste int hasRead = 0;//处理的数据量 byte length[] = new byte[4];//单个数据对象的长度 while (res.length != hasRead) { length[0] = res[0 + hasRead]; length[1] = res[1 + hasRead]; length[2] = res[2 + hasRead]; length[3] = res[3 + hasRead]; hasRead += 4; int dataLength = ProtoBufUtil.byteArrayToInt(length); byte finalByte[] = ProtoBufUtil.subBytes(res, hasRead, dataLength); Product prod = ProtoBufUtil.deserializer(finalByte, Product.class); resultProd.add(prod); hasRead += dataLength; } System.out.println("Read and treat Cost time:"+(System.currentTimeMillis()-treatStart)); System.out.println("list size:"+resultProd.size()); // resultProd.forEach(System.out::println); }catch(Exception e){ e.printStackTrace(); } } } //Object for test class Product{ String id; String name; public String getId() { return id; } public void setId(String id) { this.id = id; } public String getName() { return name; } public void setName(String name) { this.name = name; } @Override public String toString() { return "name: " + this.getName() + ", id: " + this.getId(); } } 下面是测试结果。 我测试了 500 万数据。 该文件仅写入一次。 我们在测试中使用了传统的文件写入方式,因为在真实场景中一次写入文件的操作非常少。 使用NIO或其他方式会更快。 可以看到,读出500万条数据,并逐条反序列化所花费的时间只有3秒,这就是Protostuff 的优势所在了,在一些只需要小型本地文件存储的地方,读写和处理速度会非常快,非常有用。 Write data time cost:20914 毫秒 Read and treat Cost time:3142 毫秒 list size:5000000 文章导航 The php wordpress and java platform website under tomcat share apache, single server, dual domain name configuration. Use Google Protostuff to serialize and deserialize,read and write files very fast.