Я ищу Apache Ignite для использования в качестве быстрой базы данных. Производительность очень важна, мне нужно построить ее как можно быстрее с доступными ресурсами. Сначала я копирую все (450M) записи из исходной тестовой базы данных в кеши Ignite через IgniteDataStreams, используя PK в качестве ключа. База данных не помещается в память, поэтому у меня включено сохранение диска и отключено вытеснение. Данные вставляются параллельно с использованием 8 потоков. У меня есть только один, но довольно мощный Windows P C, выполняющий всю работу, отдельного кластера Ignite нет. Меня не интересует восстановление кеша, поэтому WAL отключен. Все идет хорошо, пока я не наберу около 310 миллионов записей (2 часа работы). В этот момент Ignite начинает подавляться, вставки замедляются, а затем останавливаются с исключениями. Исключение запускается настройкой systemWorkerBlockedTimeout, установленной на 5 минут. Продление этого времени совсем не помогает. Основываясь на дампе кучи, я попытался добавить -DIGNITE_PAGES_LIST_DISABLE_ONHEAP_CACHING = true, и это не удалось немного позже, но все равно не удалось завершить sh задание. Я прочитал руководства по производительности и попытался настроить другие параметры Ignite, но не заметил никаких последствий. Как узнать, какой воркер блокируется и почему?
Спасибо
Tomasz Gry go
Java 1.8.0_231
Apache Ignite 2.8.1
Windows 10, 64G memory
2020-05-27 21:54:26,176 [Storage2 ] [ERROR] - DTR_0030 worker Storage2 had error: FATAL ERROR java.lang.IllegalStateException: Data streamer has been closed.
java.lang.IllegalStateException: Data streamer has been closed.
at org.apache.ignite.internal.processors.datastreamer.DataStreamerImpl.closedException(DataStreamerImpl.java:1095)
at org.apache.ignite.internal.processors.datastreamer.DataStreamerImpl.lock(DataStreamerImpl.java:446)
at org.apache.ignite.internal.processors.datastreamer.DataStreamerImpl.addDataInternal(DataStreamerImpl.java:646)
at org.apache.ignite.internal.processors.datastreamer.DataStreamerImpl.addDataInternal(DataStreamerImpl.java:631)
at org.apache.ignite.internal.processors.datastreamer.DataStreamerImpl.addData(DataStreamerImpl.java:753)
at com.sc.extr.cache.PureIgniteDynamicRowStorage.putIfAbsent(PureIgniteDynamicRowStorage.java:83)
at com.sc.extr.cache.PureIgniteDynamicRowStorage.addRowOnKey(PureIgniteDynamicRowStorage.java:160)
at com.sc.extr.tree.MultiCacheTreeBuilder.addRootRowToCache(MultiCacheTreeBuilder.java:409)
at com.sc.extr.tree.MultiCacheTreeBuilder.parentRev1to1(MultiCacheTreeBuilder.java:237)
at com.sc.extr.tree.MultiCacheTreeBuilder.addRowToCache(MultiCacheTreeBuilder.java:333)
at com.sc.extr.tree.MultiCacheTreeBuilder.parentRev(MultiCacheTreeBuilder.java:274)
at com.sc.extr.tree.MultiCacheTreeBuilder.addRow(MultiCacheTreeBuilder.java:379)
at com.sc.extr.tree.MultiCacheTreeBuilder.process(MultiCacheTreeBuilder.java:206)
at com.sc.bi.workflow.WorkTransformer.processOne(WorkTransformer.java:84)
at com.sc.bi.workflow.WorkTransformer.doWork(WorkTransformer.java:145)
at com.sc.bi.workflow.WorkTransformer.processQueue(WorkTransformer.java:210)
at com.sc.bi.workflow.WorkTransformer.run(WorkTransformer.java:169)
Caused by: class org.apache.ignite.IgniteCheckedException: Data streamer has been cancelled: DataStreamerImpl [bufLdrSzPerThread=4096, rcvr=org.apache.ignite.internal.processors.datastreamer.DataStreamerImpl$IsolatedUpdater@381b03ed, ioPlcRslvr=null, cacheName=PERSON.PTINTN, bufSize=512, parallelOps=0, timeout=-1, autoFlushFreq=0, bufMappings=ConcurrentHashMap {03e74462-12ec-4140-b9fb-a975572ac3bb=Buffer [node=TcpDiscoveryNode [id=03e74462-12ec-4140-b9fb-a975572ac3bb, consistentId=b01eb38b-7728-4e43-a697-0bc52f872e44, addrs=ArrayList [127.0.0.1, 172.27.179.112], sockAddrs=HashSet [SOFTBI-DEV.sc.com/172.27.179.112:47500, /127.0.0.1:47500], discPort=47500, order=1, intOrder=1, lastExchangeTime=1590614830815, loc=true, ver=2.8.1#20200521-sha1:86422096, isClient=false], isLocNode=true, idGen=0, sem=java.util.concurrent.Semaphore@2a869d9[Permits = 64], perNodeParallelOps=64, entriesCnt=2048, locFutsSize=0, reqsSize=0]}, cacheObjProc=GridProcessorAdapter [], cacheObjCtx=org.apache.ignite.internal.processors.cache.CacheObjectContext@2a5313b0, cancelled=true, cancellationReason=null, failCntr=0, activeFuts=GridConcurrentHashSet [GridFutureAdapter [ignoreInterrupts=false, state=INIT, res=null, hash=2102798044], GridFutureAdapter [ignoreInterrupts=false, state=INIT, res=null, hash=1195632760], GridFutureAdapter [ignoreInterrupts=false, state=INIT, res=null, hash=370791970], GridFutureAdapter [ignoreInterrupts=false, state=INIT, res=null, hash=420732031], GridFutureAdapter [ignoreInterrupts=false, state=INIT, res=null, hash=1453517070]], jobPda=null, depCls=null, fut=DataStreamerFuture [super=GridFutureAdapter [ignoreInterrupts=false, state=INIT, res=null, hash=1165180540]], publicFut=IgniteFuture [orig=DataStreamerFuture [super=GridFutureAdapter [ignoreInterrupts=false, state=INIT, res=null, hash=1165180540]]], disconnectErr=null, closed=true, lastFlushTime=1590629894701, skipStore=false, keepBinary=false, maxRemapCnt=32, remapSem=java.util.concurrent.Semaphore@6e6f060b[Permits = 2147483647], remapOwning=false]
at org.apache.ignite.internal.processors.datastreamer.DataStreamerImpl.closeEx(DataStreamerImpl.java:1347)
at org.apache.ignite.internal.processors.datastreamer.DataStreamerImpl.closeEx(DataStreamerImpl.java:1318)
at org.apache.ignite.internal.processors.datastreamer.DataStreamProcessor.onKernalStop(DataStreamProcessor.java:155)
at org.apache.ignite.internal.IgniteKernal.stop0(IgniteKernal.java:2551)
at org.apache.ignite.internal.IgniteKernal.stop(IgniteKernal.java:2499)
at org.apache.ignite.internal.IgnitionEx$IgniteNamedInstance.stop0(IgnitionEx.java:2650)
at org.apache.ignite.internal.IgnitionEx$IgniteNamedInstance.stop(IgnitionEx.java:2613)
at org.apache.ignite.internal.IgnitionEx.stop(IgnitionEx.java:339)
at org.apache.ignite.failure.StopNodeFailureHandler$1.run(StopNodeFailureHandler.java:36)
at java.lang.Thread.run(Thread.java:748)
Ignite config
<?xml version="1.0" encoding="UTF-8"?>
<beans xmlns="http://www.springframework.org/schema/beans"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="
http://www.springframework.org/schema/beans
http://www.springframework.org/schema/beans/spring-beans.xsd">
<bean class="org.apache.ignite.configuration.IgniteConfiguration">
<property name="gridLogger">
<bean class="org.apache.ignite.logger.log4j2.Log4J2Logger">
<constructor-arg type="java.lang.String" value="log4j2.xml"/>
</bean>
</property>
<property name="communicationSpi">
<bean class="org.apache.ignite.spi.communication.tcp.TcpCommunicationSpi">
<!-- Override local port. -->
<property name="localPort" value="47400"/>
</bean>
</property>
<!-- Configure internal thread pool. 64-->
<property name="publicThreadPoolSize" value="16"/>
<!-- Configure system thread pool. 32-->
<property name="systemThreadPoolSize" value="8"/>
<property name="systemWorkerBlockedTimeout" value="#{5 * 60 * 1000}"/>
<property name="failureHandler">
<bean class="org.apache.ignite.failure.StopNodeFailureHandler">
<!-- uncomment to enable this handler to
process critical workers' hung-ups -->
<property name="ignoredFailureTypes">
<list>
</list>
</property>
</bean>
</property>
<!-- Set to true to enable distributed class loading for examples, default is false. -->
<property name="peerClassLoadingEnabled" value="false"/>
<property name="dataStorageConfiguration">
<bean class="org.apache.ignite.configuration.DataStorageConfiguration">
<!--
Sets a path to the root directory where data and indexes are
to be persisted. It's assumed the directory is on a separated SSD.
-->
<property name="storagePath" value="persistence"/>
<!--
Sets a path to the directory where WAL is stored.
It's assumed the directory is on a separated HDD.
-->
<property name="walPath" value="wal"/>
<!--
Sets a path to the directory where WAL archive is stored.
The directory is on the same HDD as the WAL.
-->
<property name="walArchivePath" value="wal/archive"/>
<!-- Changing WAL Mode. -->
<property name="walMode" value="NONE"/>
<!-- Set the page size to 4 KB, default -->
<!-- limit 1 KB - 16 KB -->
<property name="pageSize" value="#{4 * 1024}"/>
<!-- Enable write throttling. -->
<property name="writeThrottlingEnabled" value="false"/>
<property name="checkpointFrequency" value="500"/>
<property name="lockWaitTime" value="2000"/>
<property name="checkpointThreads" value="1"/>
<property name="checkpointWriteOrder" value="RANDOM"/>
<!--
Default memory region that grows endlessly. A cache is bound to this memory region
unless it sets another one in its CacheConfiguration.
-->
<property name="defaultDataRegionConfiguration">
<bean class="org.apache.ignite.configuration.DataRegionConfiguration">
<!--property name="name" value="Default_Region"/-->
<property name="name" value="default"/>
<!-- 100 MB memory region with disabled eviction -->
<property name="initialSize" value="#{100L * 1024 * 1024}"/>
<!-- maxSize 20 MB is too little -->
<!-- sum of all maxSize values has to be less than total memory of the system -->
<!-- limits size in memory, not on disk -->
<!-- default value 1.2GB -->
<property name="maxSize" value="#{2L * 1024 * 1024 * 1024}"/>
<property name="persistenceEnabled" value="true"/>
<!-- Increasing the buffer size to 1 GB. -->
<property name="checkpointPageBufferSize" value="#{1L * 1024 * 1024 * 1024}"/>
</bean>
</property>
<!-- Defining several data regions for different memory regions -->
<property name="dataRegionConfigurations">
<list>
</list>
</property>
</bean>
</property>
<property name="cacheConfiguration">
<list>
<!--bean class="org.apache.ignite.configuration.CacheConfiguration">
<property name="dataRegionName" value="default"/>
<property name="name" value=".ShardDetectorStorage"/>
<property name="onheapCacheEnabled" value="true"/>
</bean>
<bean class="org.apache.ignite.configuration.CacheConfiguration">
<property name="dataRegionName" value="default"/>
<property name="name" value=".ChildrenStore"/>
<property name="onheapCacheEnabled" value="true"/>
</bean>
<bean class="org.apache.ignite.configuration.CacheConfiguration">
<property name="dataRegionName" value="default"/>
<property name="name" value=".ChildrenStore.listsize"/>
<property name="onheapCacheEnabled" value="true"/>
</bean>
<bean class="org.apache.ignite.configuration.CacheConfiguration">
<property name="dataRegionName" value="default"/>
<property name="name" value=".RootStorage"/>
<property name="onheapCacheEnabled" value="true"/>
</bean>
<bean class="org.apache.ignite.configuration.CacheConfiguration">
<property name="dataRegionName" value="default"/>
<property name="name" value=".QualifierStorage"/>
<property name="onheapCacheEnabled" value="true"/>
</bean-->
</list>
</property>
</bean>
</beans>
Java настройки
-Xms1024m -Xmx50g -Xss1024m
-Xverify:none
-server
-DIGNITE_QUIET=true
-XX:+UseG1GC
-XX:+DisableExplicitGC
-Djava.net.preferIPv4Stack=true
-XX:+AlwaysPreTouch
-XX:+ScavengeBeforeFullGC
-XX:+AggressiveOpts
частичный дамп потока при замедлении
"db-checkpoint-thread-#54" #99 prio=5 os_prio=0 tid=0x0000000070344800 nid=0x2d54 runnable [0x0000001c5df3e000]
java.lang.Thread.State: RUNNABLE
at org.apache.ignite.internal.processors.cache.persistence.pagemem.PageMemoryImpl$Segment.removePageForReplacement(PageMemoryImpl.java:2398)
at org.apache.ignite.internal.processors.cache.persistence.pagemem.PageMemoryImpl$Segment.access$900(PageMemoryImpl.java:2093)
at org.apache.ignite.internal.processors.cache.persistence.pagemem.PageMemoryImpl.acquirePage(PageMemoryImpl.java:773)
at org.apache.ignite.internal.processors.cache.persistence.pagemem.PageMemoryImpl.acquirePage(PageMemoryImpl.java:701)
at org.apache.ignite.internal.processors.cache.persistence.GridCacheOffheapManager.saveStoreMetadata(GridCacheOffheapManager.java:342)
at org.apache.ignite.internal.processors.cache.persistence.GridCacheOffheapManager.syncMetadata(GridCacheOffheapManager.java:268)
at org.apache.ignite.internal.processors.cache.persistence.GridCacheOffheapManager.syncMetadata(GridCacheOffheapManager.java:254)
at org.apache.ignite.internal.processors.cache.persistence.GridCacheOffheapManager.beforeCheckpointBegin(GridCacheOffheapManager.java:226)
at org.apache.ignite.internal.processors.cache.persistence.GridCacheDatabaseSharedManager$Checkpointer.markCheckpointBegin(GridCacheDatabaseSharedManager.java:4125)
at org.apache.ignite.internal.processors.cache.persistence.GridCacheDatabaseSharedManager$Checkpointer.doCheckpoint(GridCacheDatabaseSharedManager.java:3738)
at org.apache.ignite.internal.processors.cache.persistence.GridCacheDatabaseSharedManager$Checkpointer.body(GridCacheDatabaseSharedManager.java:3623)
at org.apache.ignite.internal.util.worker.GridWorker.run(GridWorker.java:120)
at java.lang.Thread.run(Thread.java:748)
Locked ownable synchronizers:
- <0x000000009b71a250> (a java.util.concurrent.locks.ReentrantReadWriteLock$NonfairSync)