Я пытаюсь перенести knn (k поиск ближайшего соседа) на дерево kd, которое я написал в Java, на C.
Вывод Java, как и ожидалось:
Nearest to Key: 6.0,5.0,4.0
Key:6.0,5.0,4.0,min distance:0.0
Key:5.0,4.0,3.0,min distance:3.0
Key:7.0,6.0,5.0,min distance:3.0
Key:4.0,3.0,2.0,min distance:12.0
Key:3.0,2.0,1.0,min distance:27.0
Java-код, класс (Это быстрая реализация только для того, чтобы алгоритм заработал до того, как я запустил свой порт):
class kd_tree {
public int DEFAULT_NUMBER_OF_DIMENSIONS = 1;
static int current_number_of_data_points = 0;
static int current_global_median = 0;
kd_tree left;
kd_tree right;
float[] data;
private int k_dimensions;
float distance_to_neighbor;
}
Java-метод knn:
/*===========================================================================
Function knn, knn algorithm using kd-tree & minimumNeighbor function.
Description:
==========================================================*/
public static kd_tree[] knn( kd_tree root
, float[] data_point
, int depth
, int k_dimension
, int number_of_nearest_neighbors) {
kd_tree[] all_nearests = new kd_tree[current_number_of_data_points];
kd_tree[] n_nearests = new kd_tree[current_number_of_data_points];
if (root != null) {
int nearests_counter = 0;
/* now lets traverse the tree inorder & calculate distances from the
query point based on Morris Traversal algorithm that does not
use any stacks or no recursion */
kd_tree cur = root;
kd_tree pre;
while (cur != null) {
if (cur.left == null) {
/* debugging System.out.println(cur.data[0]);
calculate distance */
cur.distance_to_neighbor = n_dimensional_euclidean(data_point, cur.data);
all_nearests[nearests_counter] = cur;
nearests_counter++;
cur = cur.right; // move to next right node
} else { // has a left subtree
pre = cur.left;
while (pre.right != null && pre.right != cur) { // find rightmost
pre = pre.right;
}
/* Make current as the right child of its inorder
predecessor */
if (pre.right == null) {
pre.right = cur;
cur = cur.left;
} else {
pre.right = null;
// debugging printf("%d ", current->data);
// calculate distance
cur.distance_to_neighbor = n_dimensional_euclidean( data_point, cur.data);
all_nearests[nearests_counter] = cur;
nearests_counter++;
cur = cur.right;
}
}
}//end while
// base cases
// sort from least to greatest
insertion_sort_based_on_distance(all_nearests);
// return on specified number_of_nearest_neighbors
for (int i = 0; i < number_of_nearest_neighbors; i++) {
n_nearests[i]=all_nearests[i];
}
}
return n_nearests;
}
Соответствующие фрагменты кода C:
#include <stdlib.h>
#ifndef KDTREE_H
#define KDTREE_H
/*
* Representation of a kd tree
*/
typedef struct tree_ {
struct tree* left;
struct tree* right;
float* info;
float distance_to_neighbor;
} tree;
// pre-allocated tree nodes array
static tree tree_space[KD_TREE_HEAP_SIZE];
// the knn algorithm will require memory space upto tree size
static tree* processing_space [KD_TREE_HEAP_SIZE];
tree* knn( tree* root
, float data_point[]
, int depth
, const int k_dimensions
, int number_of_nearest_neighbors
, int total_number_of_elements
, tree* n_nearests);
Соответствующая реализация knn, kdtree.c:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <float.h>
#include "kdtree.h"
#include "sorting_utility.h"
static int current_number_of_kd_tree_nodes = 0;
static int current_number_of_data_points = 0;
/*===========================================================================
Function knn, knn algorithm using kd-tree.
Description:
==========================================================*/
tree* knn( tree* root
, float data_point[]
, int depth
, const int k_dimensions
, int number_of_nearest_neighbors
, tree* n_nearests)
{
tree all_nearests[current_number_of_kd_tree_nodes];
tree* source_data_end_ptr = NULL;
tree* source_data_start_ptr = NULL;
tree* destination_data_start_ptr = NULL;
tree* destination_data_end_ptr = NULL;
if(NULL != root && NULL != n_nearests)
{
int nearests_counter = 0;
// now lets traverse the tree inorder & calculate distances
// from the query point
tree* cur = root;
tree* pre;
while(NULL != cur)
{
if(NULL == cur->left)
{
cur->distance_to_neighbor = n_dimensional_euclidean(data_point, cur->info);
processing_space[nearests_counter] = *cur;
nearests_counter++;
cur = cur->right; // move to next right node
}
else
{
pre = cur->left;
while(pre->right != NULL && pre->right != cur)
{ // find rightmost
pre = pre->right;
}
/* Make current as the right child of its inorder predecessor */
if(pre->right == NULL)
{
pre->right = cur;
cur = cur->left;
}
else
{
pre->right = NULL;
// calculate distance
cur->distance_to_neighbor = n_dimensional_euclidean(data_point, cur->info);
processing_space[nearests_counter] = *cur;
nearests_counter++;
cur = cur->right;
}
}
} // end while
// JUST FOR DEBUGGING START
printf ("***For debugging before sort:\n");
for(int i = 0; i < current_number_of_kd_tree_nodes; i++)
{
printf("{");
for(int c = 0; c < k_dimensions; c++)
{
if(NULL != processing_space[i].info)
{
printf("%f,", processing_space[i].info[c]);
}
else
{
break;
}
} // end for
printf("} ");
printf("min_distance=%f\n", processing_space[i].distance_to_neighbor);
} // end for
// JUST FOR DEBUGGING END
/* copy relevant range up current_number_of_kd_tree_nodes before sort,
* in order to avoid sorting beyond range that does not have any data */
source_data_start_ptr = &processing_space[0];
destination_data_start_ptr = &all_nearests[0];
destination_data_end_ptr = &all_nearests[current_number_of_kd_tree_nodes - 1];
while(destination_data_start_ptr <= destination_data_end_ptr)
{
*destination_data_start_ptr = *source_data_start_ptr;
source_data_start_ptr++;
destination_data_start_ptr++;
}
// sort based on distance from query point
quick_sort(all_nearests, 0, current_number_of_kd_tree_nodes - 1);
// JUST FOR DEBUGGING START
printf("***For debugging after sort\n");
for (int i = 0; i < current_number_of_kd_tree_nodes; i++)
{
printf("{");
for (int c = 0; c < k_dimensions; c++)
{
if (NULL != all_nearests[i].info)
{
printf ("%f,", all_nearests[i].info[c]);
}
else
{
break;
}
} // end for
printf("} ");
printf("min_distance=%f\n", all_nearests[i].distance_to_neighbor);
} // end for
// JUST FOR DEBUGGING END
/* copy only the n_nearest & ignore/ do NOT copy any empty tree nodes */
// reuse pointers
destination_data_end_ptr = &n_nearests[number_of_nearest_neighbors - 1];
source_data_end_ptr = &all_nearests[current_number_of_kd_tree_nodes - 1];
source_data_start_ptr = all_nearests;
int counter = 0;
while(counter < number_of_nearest_neighbors && source_data_start_ptr < source_data_end_ptr)
{
// do NOT copy any empty tree nodes
if(source_data_start_ptr != NULL && source_data_start_ptr->info != NULL)
{
/* ATTENTION: i checked with debugger & values for
(distance_to_neighbor,info,left,right were not zeroed or empty */
float distance_to_neighbor = source_data_start_ptr->distance_to_neighbor;
float* info = source_data_start_ptr->info;
tree* left = source_data_start_ptr->left;
tree* right = source_data_start_ptr->right;
n_nearests[counter].distance_to_neighbor = distance_to_neighbor;
n_nearests[counter].info = info;
n_nearests[counter].left = left;
n_nearests[counter].right = right;
counter++;
}
source_data_start_ptr++;
}
}
else
{
printf("Error, knn input parameter error");
}
return n_nearests;
} // end function
Соответствующий фрагмент main.c:
#include<kdtree.h>
int main()
{
const int query_size = 10;
const int k_dimensions = 3;
printf("knn (k nearest neighboor)\n:");
tree* n_nearests[query_size];
printf("%Nearest to Key: {%f,%f,%f}\n", query_size,query_point[0], query_point[1], query_point[2]);
knn(root, query_point, 0, k_dimensions, query_size, KD_TREE_HEAP_SIZE, n_nearests);
// print n nearest neighbors
tree* tree_ptr = &n_nearests[0];
for(int i = 0; i <query_size; i++)
{
if(NULL != tree_ptr->info)
{
printf("Key={");
for(int c=0; c < k_dimensions; c++)
{
printf("%d,", tree_ptr->info[c]);
}
printf("} ");
printf("%f min distance\n", tree_ptr->distance_to_neighbor);
}
}
return 0;
} // end main
Вывод версии C:
knn (k nearest neighboor)
:5 Nearest neighbors to Key: {6.000000,5.000000,4.000000} are:
***For debugging before sort:
{-1.000000,-2.000000,-3.000000,} min_distance=49.000000
{0.000000,-1.000000,-2.000000,} min_distance=36.000000
{1.000000,0.000000,-1.000000,} min_distance=25.000000
{2.000000,1.000000,0.000000,} min_distance=16.000000
{3.000000,2.000000,1.000000,} min_distance=9.000000
{4.000000,3.000000,2.000000,} min_distance=4.000000
{5.000000,4.000000,3.000000,} min_distance=1.000000
{6.000000,5.000000,4.000000,} min_distance=0.000000
{7.000000,6.000000,5.000000,} min_distance=1.000000
{14.000000,13.000000,12.000000,} min_distance=64.000000
{15.000000,14.000000,13.000000,} min_distance=81.000000
{16.000000,15.000000,14.000000,} min_distance=100.000000
{} min_distance=0.000000
{} min_distance=0.000000
{} min_distance=0.000000
{} min_distance=0.000000
{} min_distance=0.000000
{} min_distance=0.000000
{} min_distance=0.000000
{} min_distance=0.000000
{} min_distance=0.000000
{} min_distance=0.000000
{} min_distance=0.000000
{} min_distance=0.000000
{} min_distance=0.000000
***For debugging after sort
{} min_distance=0.000000
{} min_distance=0.000000
{} min_distance=0.000000
{} min_distance=0.000000
{} min_distance=0.000000
{} min_distance=0.000000
{} min_distance=0.000000
{} min_distance=0.000000
{6.000000,5.000000,4.000000,} min_distance=0.000000
{} min_distance=0.000000
{} min_distance=0.000000
{} min_distance=0.000000
{} min_distance=0.000000
{} min_distance=0.000000
{5.000000,4.000000,3.000000,} min_distance=1.000000
{7.000000,6.000000,5.000000,} min_distance=1.000000
{4.000000,3.000000,2.000000,} min_distance=4.000000
{3.000000,2.000000,1.000000,} min_distance=9.000000
{2.000000,1.000000,0.000000,} min_distance=16.000000
{1.000000,0.000000,-1.000000,} min_distance=25.000000
{0.000000,-1.000000,-2.000000,} min_distance=36.000000
{-1.000000,-2.000000,-3.000000,} min_distance=49.000000
{14.000000,13.000000,12.000000,} min_distance=64.000000
{15.000000,14.000000,13.000000,} min_distance=81.000000
{16.000000,15.000000,14.000000,} min_distance=100.000000
**After function return:**
Key={0,0,0,} 0.000000 min distance
Key={0,0,0,} 0.000000 min distance
Key={0,0,0,} 0.000000 min distance
Key={0,0,0,} 0.000000 min distance
Key={0,0,0,} 0.000000 min distance
У меня естьуже протестированные операции вставки, обхода порядка, поиска, удаления и поиска минимального соседа в обеих версиях Java & C, в которых они работают, как и ожидалось.
В версии C функция knn возвращает обнуленные значения после функцииreturn?
Почему значения нулевые в структуре после вызова в главной функции (обратитесь к области вывода, озаглавленной «После возврата функции»?
Надеясь, что кто-то еще заметит что-то очевидное, действительно отчаянно выдергивающее мои волосы.