Неожиданное исключение SIGBUS - PullRequest
1 голос
/ 25 февраля 2020

Мы получаем следующее исключение

[17:11:44,287][SEVERE][checkpoint-runner-#59][] Critical system error detected. Will be handled accordingly to configured handler [hnd=StopNodeOrHaltFailureHandler [tryStop=false, timeout=0, super=AbstractFailureHandler [ignoredFailureTypes=[SYSTEM_WORKER_BLOCKED, SYSTEM_CRITICAL_OPERATION_TIMEOUT]]], failureCtx=FailureContext [type=CRITICAL_ERROR, err=class o.a.i.i.processors.cache.persistence.StorageException: Failed to write page [file=/home/test/Documents/AdityaISearchTest/gridgain-community-8.7.7/work/db/node00-951ca1c1-fabe-42b9-bbbe-079a9e39141e/cache-GLOBAL/part-455.bin, pageId=283429186830339, tag=1]]]
class org.apache.ignite.internal.processors.cache.persistence.StorageException: Failed to write page [file=/home/test/Documents/AdityaISearchTest/gridgain-community-8.7.7/work/db/node00-951ca1c1-fabe-42b9-bbbe-079a9e39141e/cache-GLOBAL/part-455.bin, pageId=283429186830339, tag=1]
    at org.apache.ignite.internal.processors.cache.persistence.file.FilePageStore.write(FilePageStore.java:607)
    at org.apache.ignite.internal.processors.cache.persistence.file.FilePageStoreManager.writeInternal(FilePageStoreManager.java:535)
    at org.apache.ignite.internal.processors.cache.persistence.GridCacheDatabaseSharedManager$WriteCheckpointPages$1.writePage(GridCacheDatabaseSharedManager.java:4516)
    at org.apache.ignite.internal.processors.cache.persistence.pagemem.PageMemoryImpl.copyPageForCheckpoint(PageMemoryImpl.java:1296)
    at org.apache.ignite.internal.processors.cache.persistence.pagemem.PageMemoryImpl.checkpointWritePage(PageMemoryImpl.java:1211)
    at org.apache.ignite.internal.processors.cache.persistence.GridCacheDatabaseSharedManager$WriteCheckpointPages.writePages(GridCacheDatabaseSharedManager.java:4479)
    at org.apache.ignite.internal.processors.cache.persistence.GridCacheDatabaseSharedManager$WriteCheckpointPages.run(GridCacheDatabaseSharedManager.java:4405)
    at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
    at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
    at java.lang.Thread.run(Thread.java:748)
Caused by: java.io.IOException: class org.apache.ignite.IgniteCheckedException: No space left on device
    at org.apache.ignite.internal.processors.cache.persistence.file.AsyncFileIO.write(AsyncFileIO.java:148)
    at org.apache.ignite.internal.processors.cache.persistence.file.AbstractFileIO$5.run(AbstractFileIO.java:117)
    at org.apache.ignite.internal.processors.cache.persistence.file.AbstractFileIO.fully(AbstractFileIO.java:53)
    at org.apache.ignite.internal.processors.cache.persistence.file.AbstractFileIO.writeFully(AbstractFileIO.java:115)
    at org.apache.ignite.internal.processors.cache.persistence.file.FilePageStore.write(FilePageStore.java:570)
    ... 9 more
Caused by: class org.apache.ignite.IgniteCheckedException: No space left on device
    at org.apache.ignite.internal.util.IgniteUtils.cast(IgniteUtils.java:7324)
    at org.apache.ignite.internal.util.future.GridFutureAdapter.resolve(GridFutureAdapter.java:260)
    at org.apache.ignite.internal.util.future.GridFutureAdapter.get0(GridFutureAdapter.java:190)
    at org.apache.ignite.internal.util.future.GridFutureAdapter.getUninterruptibly(GridFutureAdapter.java:145)
    at org.apache.ignite.internal.processors.cache.persistence.file.AsyncFileIO.write(AsyncFileIO.java:145)
    ... 13 more
Caused by: java.io.IOException: No space left on device
    at sun.nio.ch.FileDispatcherImpl.pwrite0(Native Method)
    at sun.nio.ch.FileDispatcherImpl.pwrite(FileDispatcherImpl.java:66)
    at sun.nio.ch.IOUtil.writeFromNativeBuffer(IOUtil.java:89)
    at sun.nio.ch.IOUtil.write(IOUtil.java:51)
    at sun.nio.ch.SimpleAsynchronousFileChannelImpl$3.run(SimpleAsynchronousFileChannelImpl.java:374)
    ... 3 more

Исключение, вызванное клиентским приложением:

"" "172.24.112.126" "test" "" "" "" "8688" "com.zoho.scrapy.common.ignite.IgniteUtil" "zaddPipeline" "SEVERE" "25-02-2020 00:19:16:457" "40" "" "com.zoho.scrapy.server.ignite.ScrapyIgniteException: IGNITE EXCEPTION :: Exception while sql bulk setting key, value to Ignite db - URLS. Exception -
        at com.zoho.scrapy.server.ignite.IgniteConnectionHandler$CacheHolder.sqlPipelinedSet(IgniteConnectionHandler.java:385)
        at com.zoho.scrapy.common.ignite.IgniteUtil.zaddPipeline(IgniteUtil.java:384)
        at edu.uci.ics.crawler4j.frontier.WorkQueues.putAll(WorkQueues.java:157)
        at edu.uci.ics.crawler4j.frontier.Frontier.scheduleAll(Frontier.java:126)
        at edu.uci.ics.crawler4j.crawler.WebCrawler.processPage(WebCrawler.java:537)
        at edu.uci.ics.crawler4j.crawler.WebCrawler.run(WebCrawler.java:291)
        at java.lang.Thread.run(Thread.java:748)
Caused by: java.sql.SQLException: Failed to communicate with Ignite cluster.
        at org.apache.ignite.internal.jdbc.thin.JdbcThinConnection.sendRequest(JdbcThinConnection.java:819)
        at org.apache.ignite.internal.jdbc.thin.JdbcThinStatement.execute0(JdbcThinStatement.java:226)
        at org.apache.ignite.internal.jdbc.thin.JdbcThinPreparedStatement.executeWithArguments(JdbcThinPreparedStatement.java:251)
        at org.apache.ignite.internal.jdbc.thin.JdbcThinPreparedStatement.execute(JdbcThinPreparedStatement.java:239)
        at com.zoho.scrapy.server.ignite.IgniteQuery$URLS_OBJECT.sqlBulkInsert(IgniteQuery.java:320)
        at com.zoho.scrapy.server.ignite.IgniteQuery.executeFieldsQuery(IgniteQuery.java:63)
        at com.zoho.scrapy.server.ignite.IgniteConnectionHandler$CacheHolder.sqlPipelinedSet(IgniteConnectionHandler.java:382)
        ... 6 more
Caused by: java.io.IOException: Failed to read incoming message (not enough data).
        at org.apache.ignite.internal.jdbc.thin.JdbcThinTcpIo.read(JdbcThinTcpIo.java:659)
        at org.apache.ignite.internal.jdbc.thin.JdbcThinTcpIo.read(JdbcThinTcpIo.java:637)
        at org.apache.ignite.internal.jdbc.thin.JdbcThinTcpIo.readResponse(JdbcThinTcpIo.java:559)
        at org.apache.ignite.internal.jdbc.thin.JdbcThinTcpIo.sendRequest(JdbcThinTcpIo.java:529)
        at org.apache.ignite.internal.jdbc.thin.JdbcThinConnection.sendRequest(JdbcThinConnection.java:798)

Мы хотели бы знать, почему возникает эта проблема.

1 ) Постоянство диска включено. 2) Это произошло в тестовой настройке, где клиент и сервер были бы на одной машине. 3) На момент выпуска клиентский узел распечатал этот показатель c

 ^-- Node [id=ff1b3829, uptime=03:04:00.904]
    ^-- H/N/C [hosts=1, nodes=2, CPUs=8]
    ^-- CPU [cur=0.03%, avg=5.46%, GC=0%]
    ^-- PageMemory [pages=0]
    ^-- Heap [used=338MB, free=90.45%, comm=1147MB]
    ^-- Off-heap [used=0MB, free=-1%, comm=0MB]
    ^-- Outbound messages queue [size=0]
    ^-- Public thread pool [active=0, idle=0, qSize=0]
    ^-- System thread pool [active=0, idle=0, qSize=0]"

4) При проверке скорости диска он по-прежнему показывает 37% свободного времени. 5) Метри c узла сервера выглядит следующим образом (клиент слева).

 ^-- Node [id=db7d9a8d, uptime=00:19:00.064]
    ^-- H/N/C [hosts=1, nodes=1, CPUs=8]
    ^-- CPU [cur=0.17%, avg=0.25%, GC=0%]
    ^-- PageMemory [pages=229594]
    ^-- Heap [used=191MB, free=94.58%, comm=358MB]
    ^-- Off-heap [used=907MB, free=73.96%, comm=3384MB]
    ^--   sysMemPlc region [used=0MB, free=99.99%, comm=100MB]
    ^--   default region [used=907MB, free=71.51%, comm=3184MB]
    ^--   metastoreMemPlc region [used=0MB, free=99.96%, comm=0MB]
    ^--   TxLog region [used=0MB, free=100%, comm=100MB]
    ^-- Ignite persistence [used=951MB]
    ^--   sysMemPlc region [used=0MB]
    ^--   default region [used=951MB]
    ^--   metastoreMemPlc region [used=0MB]
    ^--   TxLog region [used=0MB]
    ^-- Outbound messages queue [size=0]
    ^-- Public thread pool [active=0, idle=0, qSize=0]
    ^-- System thread pool [active=0, idle=6, qSize=0]

6) В журнале ошибок Ignite обнаружена проблема. Некоторая часть его содержимого вставлена ​​сюда

---------------  T H R E A D  ---------------

Current thread (0x00007f5478007000):  JavaThread "client-connector-#71" [_thread_in_Java, id=15658, stack(0x00007f533fee6000,0x00007f533ffe7000)]

siginfo: si_signo: 7 (SIGBUS), si_code: 2 (BUS_ADRERR), si_addr: 0x00007f533d4f5000

Registers:
RAX=0x000000076ef45cc8, RBX=0x0000000000001000, RCX=0x00007f533d4f5690, RDX=0xffffffffffffff30
RSP=0x00007f533ffe45e0, RBP=0x00007f533ffe45e0, RSI=0x00007f533d4f4698, RDI=0x000000076ef45cc0
R8 =0x00007f533a432000, R9 =0x0000000000000001, R10=0x00007f556d052e20, R11=0x0000000000001000
R12=0x0000000000000000, R13=0x00000000030c2698, R14=0x000000076ef37330, R15=0x00007f5478007000
RIP=0x00007f556d052842, EFLAGS=0x0000000000010296, CSGSFS=0x002b000000000033, ERR=0x0000000000000006
  TRAPNO=0x000000000000000e

7) Файл конфигурации сервера имеет следующую конфигурацию IgniteConfiguration

<bean abstract="true" id="ignite.cfg" class="org.apache.ignite.configuration.IgniteConfiguration">

        <property name="peerClassLoadingEnabled" value="true"/>
    <property name="clientMode" value="false"/>
    <property name="queryThreadPoolSize" value="16" />
    <property name="failureDetectionTimeout" value="300000"/>
    <property name="clientFailureDetectionTimeout" value="300000"/>

      <property name="dataStorageConfiguration">
        <bean class="org.apache.ignite.configuration.DataStorageConfiguration">
          <property name="writeThrottlingEnabled" value="true"/>
          <property name="defaultDataRegionConfiguration">
        <bean class="org.apache.ignite.configuration.DataRegionConfiguration">
          <property name="persistenceEnabled" value="true"/>
        </bean>
          </property>
        </bean>
      </property>
</bean>

8) Вывод df -kh

Filesystem      Size  Used Avail Use% Mounted on
udev            7.8G     0  7.8G   0% /dev
tmpfs           1.6G   50M  1.6G   4% /run
/dev/dm-0        96G  8.2G   86G   9% /
tmpfs           7.8G   25M  7.8G   1% /dev/shm
tmpfs           5.0M  4.0K  5.0M   1% /run/lock
tmpfs           7.8G     0  7.8G   0% /sys/fs/cgroup
/dev/nvme0n1p4  954M  422M  441M  49% /boot
/dev/nvme0n1p3  976M  3.5M  972M   1% /boot/efi
/dev/dm-2        97G   17M   96G   1% /personal
/dev/dm-1       284G  181G  104G  64% /home
tmpfs           1.6G   92K  1.6G   1% /run/user/1001

9 ) df -i вывод

Filesystem      Inodes IUsed   IFree IUse% Mounted on
udev           2030170   562 2029608    1% /dev
tmpfs          2037855   857 2036998    1% /run
/dev/dm-0            0     0       0     - /
tmpfs          2037855    36 2037819    1% /dev/shm
tmpfs          2037855     8 2037847    1% /run/lock
tmpfs          2037855    17 2037838    1% /sys/fs/cgroup
/dev/nvme0n1p4       0     0       0     - /boot
/dev/nvme0n1p3       0     0       0     - /boot/efi
/dev/dm-2            0     0       0     - /personal
/dev/dm-1            0     0       0     - /home
tmpfs          2037855    36 2037819    1% /run/user/1001
...