Разбор HTML с непонятным форматированием имен классов - PullRequest
0 голосов
/ 22 октября 2018

HTML-файл выглядит следующим образом

<style>.sysinfoTable, 
.sysinfoTable table {
	width: 100%;
	border-spacing: 0px 2px;
}
.sysinfoTable td {
	padding:2px 0 2px 5px;
}
.sysinfoTable .sysinfoTableCategoryHeader {
	font-weight: bold;
}
.sysinfoTable .sysinfoTableDeviceHeader {
	font-weight: bold;
}
.sysinfoTable .sysinfoTablePropertyEven {
}
.sysinfoTable .sysinfoTablePropertyOdd {
}
.sysinfoTable .sysinfoTablePropertyKey {
	font-weight: bold;
}
.sysinfoTable .sysinfoTablePropertyValue {
}

.sysinfoTable .sysinfoTableCategoryHeader {
	background-color: #def;
}
.sysinfoTable .sysinfoTableDeviceHeader {
	background-color: #fec;
}
.sysinfoTable .sysinfoTablePropertyEven {
}
.sysinfoTable .sysinfoTablePropertyOdd {
	background-color: #eee;
}
.sysinfoTable .sysinfoTablePropertyKey {
}
.sysinfoTable .sysinfoTablePropertyValue {
}
</style>

<table class="sysinfoTable">
	<tr class="sysinfoTableCategoryHeader">
		<td colspan="4">Operating System</td>
	</tr>
	<tr class="sysinfoTablePropertyEven">
		<td />
		<td />
		<td><span class="sysinfoTablePropertyKey">Operating System Name</span></td>
		<td><span class="sysinfoTablePropertyValue">Linux</span></td>
	</tr>
	<tr class="sysinfoTablePropertyOdd">
		<td />
		<td />
		<td><span class="sysinfoTablePropertyKey">Kernel Version</span></td>
		<td><span class="sysinfoTablePropertyValue">4.8.0-1-amd64</span></td>
	</tr>
	<tr class="sysinfoTablePropertyEven">
		<td />
		<td />
		<td><span class="sysinfoTablePropertyKey">Kernel Date</span></td>
		<td><span class="sysinfoTablePropertyValue">#1 SMP Debian 4.8.7-1 (2016-11-13)</span></td>
	</tr>
	<tr class="sysinfoTablePropertyOdd">
		<td />
		<td />
		<td><span class="sysinfoTablePropertyKey">Architecture Name</span></td>
		<td><span class="sysinfoTablePropertyValue">x86_64</span></td>
	</tr>
	<tr class="sysinfoTablePropertyEven">
		<td />
		<td />
		<td><span class="sysinfoTablePropertyKey">Native Architecture Type</span></td>
		<td><span class="sysinfoTablePropertyValue">64-bit</span></td>
	</tr>
	<tr class="sysinfoTablePropertyOdd">
		<td />
		<td />
		<td><span class="sysinfoTablePropertyKey">Distribution Information</span></td>
		<td><span class="sysinfoTablePropertyValue">os-release : PRETTY_NAME=&quot;Debian GNU/Linux stretch/sid&quot; NAME=&quot;Debian GNU/Linux&quot; ID=debian HOME_URL=&quot;https://www.debian.org/&quot; SUPPORT_URL=&quot;https://www.debian.org/support&quot; BUG_REPORT_URL=&quot;https://bugs.debian.org/&quot;  / debian_version : stretch/sid</span></td>
	</tr>
	<tr class="sysinfoTableCategoryHeader">
		<td colspan="4">Motherboard</td>
	</tr>
	<tr class="sysinfoTablePropertyEven">
		<td />
		<td />
		<td><span class="sysinfoTablePropertyKey">Manufacturer</span></td>
		<td><span class="sysinfoTablePropertyValue">Apple Inc.</span></td>
	</tr>
	<tr class="sysinfoTablePropertyOdd">
		<td />
		<td />
		<td><span class="sysinfoTablePropertyKey">Product</span></td>
		<td><span class="sysinfoTablePropertyValue">iMac14,4</span></td>
	</tr>
....
</table>	

Теперь мне нужно проанализировать его по ключу и значению: Итак, я пошел с html-пакетом гибкости, выполнив вот так

 Dim htmlNodes = ob1.DocumentNode.SelectNodes("//body/div/table/tr[@class='sysinfoTablePropertyEven']")
        Dim pairs As String
        Try
            For Each noe In htmlNodes
                ob2.LoadHtml(noe.InnerHtml)
                Dim htmlKey = ob2.DocumentNode.SelectNodes("//td/span[@class='sysinfoTablePropertyKey']")
                Dim htmlValue = ob2.DocumentNode.SelectNodes("//td/span[@class='sysinfoTablePropertyValue']")

                pairs += htmlKey(0).InnerText & ":" & htmlValue(0).InnerText & vbCrLf
            Next
        Catch ex As Exception

        End Try

        MsgBox(pairs)

Но используя этофрагмент кода я не могу знать, где я:

iE

Когда я спрашиваю ключ: Имя устройства возвращает 4 из них

pic

Суть в том, что мне нужно как-то спросить видео контроллеры: Имя устройства

...