Я пытаюсь справиться с базой данных Каталога жизни (http://www.catalogueoflife.org/services/index.html) в MySQL. Эта база данных содержит информацию о научных названиях видов, как допустимых допустимых названиях, так и недействительных или устаревших синонимах. Это (http://www.catalogueoflife.org/services/Database_documentation.pdf) - это PDF-файл, объясняющий, как работает база данных, и следующий код, используемый для написания и заполнения таблиц:
CREATE DATABASE icol2011ac;
USE icol2011ac;
--
-- Table structure for table `author_string`
--
DROP TABLE IF EXISTS `author_string`;
/*!40101 SET @saved_cs_client = @@character_set_client */;
/*!40101 SET character_set_client = utf8 */;
CREATE TABLE `author_string` (
`id` int(10) UNSIGNED NOT NULL AUTO_INCREMENT,
`string` varchar(255) NOT NULL COMMENT 'Name of author(s), who described the taxon or published the current combination and the year when appropriate.',
PRIMARY KEY (`id`),
UNIQUE KEY `string` (`string`)
) ENGINE=MyISAM AUTO_INCREMENT=79193 DEFAULT CHARSET=utf8 COMMENT='Author citations of taxa and synonyms';
/*!40101 SET character_set_client = @saved_cs_client */;
--
-- Table structure for table `hybrid`
--
DROP TABLE IF EXISTS `hybrid`;
/*!40101 SET @saved_cs_client = @@character_set_client */;
/*!40101 SET character_set_client = utf8 */;
CREATE TABLE `hybrid` (
`taxon_id` int(10) UNSIGNED NOT NULL,
`parent_taxon_id` int(10) UNSIGNED NOT NULL COMMENT 'References two (or three) parent taxon ids',
PRIMARY KEY (`taxon_id`,`parent_taxon_id`),
KEY `parent_taxon_id` (`parent_taxon_id`)
) ENGINE=MyISAM DEFAULT CHARSET=utf8 COMMENT='Links to parent taxa of hybrids';
/*!40101 SET character_set_client = @saved_cs_client */;
--
-- Table structure for table `reference`
--
DROP TABLE IF EXISTS `reference`;
/*!40101 SET @saved_cs_client = @@character_set_client */;
/*!40101 SET character_set_client = utf8 */;
CREATE TABLE `reference` (
`id` int(10) UNSIGNED NOT NULL AUTO_INCREMENT,
`authors` varchar(255) DEFAULT NULL COMMENT 'Complete author string',
`year` varchar(25) DEFAULT NULL COMMENT 'Year(s) of publication',
`title` varchar(255) DEFAULT NULL COMMENT 'Title of the publication',
`text` text COMMENT 'Additional information pertaining to the publication',
`uri_id` int(10) UNSIGNED DEFAULT NULL COMMENT 'Link to downloadable version',
PRIMARY KEY (`id`),
KEY `authors` (`authors`),
KEY `year` (`year`),
KEY `uri_id` (`uri_id`)
) ENGINE=MyISAM AUTO_INCREMENT=60462 DEFAULT CHARSET=utf8 COMMENT='References used for taxa, common names and synonyms';
/*!40101 SET character_set_client = @saved_cs_client */;
--
-- Table structure for table `reference_to_synonym`
--
DROP TABLE IF EXISTS `reference_to_synonym`;
/*!40101 SET @saved_cs_client = @@character_set_client */;
/*!40101 SET character_set_client = utf8 */;
CREATE TABLE `reference_to_synonym` (
`reference_id` int(10) UNSIGNED NOT NULL,
`synonym_id` int(10) UNSIGNED NOT NULL,
PRIMARY KEY (`reference_id`,`synonym_id`),
KEY `synonym_id` (`synonym_id`)
) ENGINE=MyISAM DEFAULT CHARSET=utf8 COMMENT='Links references to synonyms';
/*!40101 SET character_set_client = @saved_cs_client */;
--
-- Table structure for table `reference_to_taxon`
--
DROP TABLE IF EXISTS `reference_to_taxon`;
/*!40101 SET @saved_cs_client = @@character_set_client */;
/*!40101 SET character_set_client = utf8 */;
CREATE TABLE `reference_to_taxon` (
`reference_id` int(10) UNSIGNED NOT NULL,
`taxon_id` int(10) UNSIGNED NOT NULL,
PRIMARY KEY (`reference_id`,`taxon_id`),
KEY `taxon_id` (`taxon_id`)
) ENGINE=MyISAM DEFAULT CHARSET=utf8 COMMENT='Links references to taxa';
/*!40101 SET character_set_client = @saved_cs_client */;
--
-- Table structure for table `scientific_name_element`
--
DROP TABLE IF EXISTS `scientific_name_element`;
/*!40101 SET @saved_cs_client = @@character_set_client */;
/*!40101 SET character_set_client = utf8 */;
CREATE TABLE `scientific_name_element` (
`id` int(10) UNSIGNED NOT NULL AUTO_INCREMENT,
`name_element` varchar(100) NOT NULL COMMENT 'Basic element of a scientific name; e.g. the epithet argentatus as used in Larus argentatus argenteus',
PRIMARY KEY (`id`),
UNIQUE KEY `name_element` (`name_element`)
) ENGINE=MyISAM AUTO_INCREMENT=204459 DEFAULT CHARSET=utf8 COMMENT='Individual elements used to generate a scientific name';
/*!40101 SET character_set_client = @saved_cs_client */;
--
-- Table structure for table `scientific_name_status`
--
DROP TABLE IF EXISTS `scientific_name_status`;
/*!40101 SET @saved_cs_client = @@character_set_client */;
/*!40101 SET character_set_client = utf8 */;
CREATE TABLE `scientific_name_status` (
`id` tinyint(2) UNSIGNED NOT NULL AUTO_INCREMENT,
`name_status` varchar(50) NOT NULL COMMENT 'Name status of a taxon',
PRIMARY KEY (`id`),
UNIQUE KEY `name_status` (`name_status`)
) ENGINE=MyISAM AUTO_INCREMENT=6 DEFAULT CHARSET=utf8 COMMENT='Predetermined list of name statuses';
/*!40101 SET character_set_client = @saved_cs_client */;
--
-- Table structure for table `scrutiny`
--
DROP TABLE IF EXISTS `scrutiny`;
/*!40101 SET @saved_cs_client = @@character_set_client */;
/*!40101 SET character_set_client = utf8 */;
CREATE TABLE `scrutiny` (
`id` int(10) UNSIGNED NOT NULL AUTO_INCREMENT,
`scrutiny_date` date DEFAULT NULL COMMENT 'Most recent date a taxon name was verified; must parse correctly',
`original_scrutiny_date` varchar(100) DEFAULT NULL COMMENT 'Date as used in the original database; may be incomplete',
`specialist_id` int(10) UNSIGNED NOT NULL COMMENT 'Link to the specialist who examined the validity of a taxon',
PRIMARY KEY (`id`),
UNIQUE KEY `unique` (`scrutiny_date`,`specialist_id`,`original_scrutiny_date`),
KEY `scrutiny_date` (`scrutiny_date`),
KEY `specialist_id` (`specialist_id`)
) ENGINE=MyISAM AUTO_INCREMENT=1271 DEFAULT CHARSET=utf8 COMMENT='Latest scrutiny date of a taxon';
/*!40101 SET character_set_client = @saved_cs_client */;
--
-- Table structure for table `source_database`
--
DROP TABLE IF EXISTS `source_database`;
/*!40101 SET @saved_cs_client = @@character_set_client */;
/*!40101 SET character_set_client = utf8 */;
CREATE TABLE `source_database` (
`id` int(10) UNSIGNED NOT NULL AUTO_INCREMENT,
`name` varchar(255) NOT NULL COMMENT 'Full name of the source database',
`abbreviated_name` varchar(50) DEFAULT NULL COMMENT 'Abbreviated name of the source database',
`group_name_in_english` varchar(255) DEFAULT NULL COMMENT 'Name in English of the group(s) treated in the database',
`authors_and_editors` varchar(255) DEFAULT NULL COMMENT 'Optional author(s) and editor(s) of the source database',
`organisation` varchar(255) DEFAULT NULL COMMENT 'Optional organisation which has compiled or is owning the source database',
`contact_person` varchar(255) DEFAULT NULL COMMENT 'Optional contact person of the source database',
`version` varchar(25) DEFAULT NULL COMMENT 'Optional version number of the source database',
`release_date` date DEFAULT NULL COMMENT 'Optional most recent release date of the source database',
`abstract` text COMMENT 'Optional free text field describing the source database',
#`taxonomic_coverage` text,
PRIMARY KEY (`id`),
UNIQUE KEY `name` (`name`,`abbreviated_name`)
) ENGINE=MyISAM AUTO_INCREMENT=79 DEFAULT CHARSET=utf8 COMMENT='Information about source databases';
/*!40101 SET character_set_client = @saved_cs_client */;
--
-- Table structure for table `specialist`
--
DROP TABLE IF EXISTS `specialist`;
/*!40101 SET @saved_cs_client = @@character_set_client */;
/*!40101 SET character_set_client = utf8 */;
CREATE TABLE `specialist` (
`id` int(10) UNSIGNED NOT NULL AUTO_INCREMENT,
`name` varchar(100) NOT NULL,
PRIMARY KEY (`id`),
UNIQUE KEY `name` (`name`)
) ENGINE=MyISAM AUTO_INCREMENT=182 DEFAULT CHARSET=utf8 COMMENT='Specialists who have verified the validity of taxa';
/*!40101 SET character_set_client = @saved_cs_client */;
--
-- Table structure for table `synonym`
--
DROP TABLE IF EXISTS `synonym`;
/*!40101 SET @saved_cs_client = @@character_set_client */;
/*!40101 SET character_set_client = utf8 */;
CREATE TABLE `synonym` (
`id` int(10) UNSIGNED NOT NULL AUTO_INCREMENT,
`taxon_id` int(10) UNSIGNED NOT NULL COMMENT 'Link to valid taxon to which the synonym relates',
`author_string_id` int(10) UNSIGNED DEFAULT NULL COMMENT 'Link to author citation of the synonym',
`scientific_name_status_id` tinyint(2) UNSIGNED NOT NULL COMMENT 'Link to the name status of the synonym',
`original_id` varchar(100) DEFAULT NULL,
PRIMARY KEY (`id`),
KEY `taxon_id` (`taxon_id`),
KEY `author_string_id` (`author_string_id`),
KEY `scientific_name_status_id` (`scientific_name_status_id`)
) ENGINE=MyISAM AUTO_INCREMENT=7618428 DEFAULT CHARSET=utf8 COMMENT='Synonym details linked to a valid taxon';
/*!40101 SET character_set_client = @saved_cs_client */;
--
-- Table structure for table `synonym_name_element`
--
DROP TABLE IF EXISTS `synonym_name_element`;
/*!40101 SET @saved_cs_client = @@character_set_client */;
/*!40101 SET character_set_client = utf8 */;
CREATE TABLE `synonym_name_element` (
`taxonomic_rank_id` tinyint(3) UNSIGNED NOT NULL,
`scientific_name_element_id` int(10) UNSIGNED NOT NULL,
`synonym_id` int(10) UNSIGNED NOT NULL,
`hybrid_order` tinyint(1) UNSIGNED DEFAULT NULL COMMENT 'Order of parents if synonym is a hybrid; see documentation for details',
UNIQUE KEY `unique` (`taxonomic_rank_id`,`synonym_id`),
KEY `taxonomic_rank_id` (`taxonomic_rank_id`),
KEY `scientific_name_element_id` (`scientific_name_element_id`),
KEY `synonym_id` (`synonym_id`)
) ENGINE=MyISAM DEFAULT CHARSET=utf8 COMMENT='Name elements of a complete synonym';
/*!40101 SET character_set_client = @saved_cs_client */;
--
-- Table structure for table `taxon`
--
DROP TABLE IF EXISTS `taxon`;
/*!40101 SET @saved_cs_client = @@character_set_client */;
/*!40101 SET character_set_client = utf8 */;
CREATE TABLE `taxon` (
`id` int(10) UNSIGNED NOT NULL AUTO_INCREMENT,
`taxonomic_rank_id` tinyint(3) UNSIGNED NOT NULL,
`source_database_id` int(10) UNSIGNED DEFAULT NULL,
`original_id` varchar(100) DEFAULT NULL,
PRIMARY KEY (`id`),
KEY `taxonomic_rank_id` (`taxonomic_rank_id`),
KEY `source_database_id` (`source_database_id`)
) ENGINE=MyISAM AUTO_INCREMENT=7618427 DEFAULT CHARSET=utf8 COMMENT='Scientific name elements and hierarchy of a taxon';
/*!40101 SET character_set_client = @saved_cs_client */;
--
-- Table structure for table `taxonomic_coverage`
--
DROP TABLE IF EXISTS `taxonomic_coverage`;
CREATE TABLE `taxonomic_coverage` (
`source_database_id` int(10) NOT NULL,
`taxon_id` int(10) NOT NULL,
`sector` tinyint(2) NOT NULL,
`point_of_attachment` tinyint(1) NOT NULL DEFAULT '0',
KEY `source_database_id` (`source_database_id`),
KEY `sector` (`sector`),
KEY `taxon_id` (`taxon_id`)
) ENGINE=MyISAM DEFAULT CHARSET=utf8;
--
-- Table structure for table `taxon_detail`
--
DROP TABLE IF EXISTS `taxon_detail`;
/*!40101 SET @saved_cs_client = @@character_set_client */;
/*!40101 SET character_set_client = utf8 */;
CREATE TABLE `taxon_detail` (
`taxon_id` int(10) UNSIGNED NOT NULL,
`author_string_id` int(10) UNSIGNED DEFAULT NULL COMMENT 'Link to author citation of the taxon',
`scientific_name_status_id` tinyint(2) UNSIGNED NOT NULL,
`scrutiny_id` int(10) UNSIGNED DEFAULT NULL,
`additional_data` text COMMENT 'Optional free text field describing the taxon',
PRIMARY KEY (`taxon_id`),
KEY `author_string_id` (`author_string_id`),
KEY `taxononomic_status_id` (`scientific_name_status_id`),
KEY `scrutiny_id` (`scrutiny_id`)
) ENGINE=MyISAM DEFAULT CHARSET=utf8 COMMENT='Details pertaining to species and infraspecies';
/*!40101 SET character_set_client = @saved_cs_client */;
--
-- Table structure for table `taxon_name_element`
--
DROP TABLE IF EXISTS `taxon_name_element`;
/*!40101 SET @saved_cs_client = @@character_set_client */;
/*!40101 SET character_set_client = utf8 */;
CREATE TABLE `taxon_name_element` (
`taxon_id` int(10) UNSIGNED NOT NULL,
`scientific_name_element_id` int(10) UNSIGNED NOT NULL,
`parent_id` int(10) UNSIGNED DEFAULT NULL,
PRIMARY KEY (`taxon_id`),
KEY `scientific_name_element_id` (`scientific_name_element_id`),
KEY `parent_id` (`parent_id`)
) ENGINE=MyISAM DEFAULT CHARSET=utf8;
/*!40101 SET character_set_client = @saved_cs_client */;
--
-- Table structure for table `taxonomic_rank`
--
DROP TABLE IF EXISTS `taxonomic_rank`;
/*!40101 SET @saved_cs_client = @@character_set_client */;
/*!40101 SET character_set_client = utf8 */;
CREATE TABLE `taxonomic_rank` (
`id` tinyint(3) UNSIGNED NOT NULL AUTO_INCREMENT,
`rank` varchar(50) NOT NULL COMMENT 'Taxonomic rank (e.g. family, subspecies)',
`marker_displayed` varchar(50) DEFAULT NULL,
`standard` tinyint(1) NOT NULL DEFAULT '0',
PRIMARY KEY (`id`),
UNIQUE KEY `rank` (`rank`)
) ENGINE=MyISAM AUTO_INCREMENT=132 DEFAULT CHARSET=utf8 COMMENT='Predetermined list of taxonomic ranks';
/*!40101 SET character_set_client = @saved_cs_client */;
/*!40101 SET SQL_MODE=@OLD_SQL_MODE */;
/*!40014 SET FOREIGN_KEY_CHECKS=@OLD_FOREIGN_KEY_CHECKS */;
/*!40014 SET UNIQUE_CHECKS=@OLD_UNIQUE_CHECKS */;
/*!40101 SET CHARACTER_SET_CLIENT=@OLD_CHARACTER_SET_CLIENT */;
/*!40101 SET CHARACTER_SET_RESULTS=@OLD_CHARACTER_SET_RESULTS */;
/*!40101 SET COLLATION_CONNECTION=@OLD_COLLATION_CONNECTION */;
/*!40111 SET SQL_NOTES=@OLD_SQL_NOTES */;
-- Dump completed on 2010-12-16 15:47:12
-- Added quick fix for adding non-ISO countries and languages to ISO tables
ALTER TABLE `language` ADD `standard` TINYINT( 1 ) NOT NULL DEFAULT '1';
ALTER TABLE `country` ADD `standard` TINYINT( 1 ) NOT NULL DEFAULT '1';
ALTER TABLE `country` CHANGE `iso` `iso` CHAR( 3 ) CHARACTER SET utf8 COLLATE utf8_general_ci NOT NULL COMMENT 'ISO 3166-1-Alpha-2 code';
ALTER TABLE `common_name` CHANGE `country_iso` `country_iso` CHAR( 3 ) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT 'Optional country code if usage is restricted to a particular country' ;
#PART2: importing files into MySQL
LOAD DATA INFILE 'C:/Quasi Eigene Dateien/Catalogue of Life/2011AC_baseschema_dump/author_string.txt' INTO TABLE author_string FIELDS TERMINATED BY '\t' LINES TERMINATED BY '\n';
LOAD DATA INFILE 'C:/Quasi Eigene Dateien/Catalogue of Life/2011AC_baseschema_dump/hybrid.txt' INTO TABLE hybrid FIELDS TERMINATED BY '\t' LINES TERMINATED BY '\n';
LOAD DATA INFILE 'C:/Quasi Eigene Dateien/Catalogue of Life/2011AC_baseschema_dump/reference.txt' INTO TABLE reference FIELDS TERMINATED BY '\t' LINES TERMINATED BY '\n';
LOAD DATA INFILE 'C:/Quasi Eigene Dateien/Catalogue of Life/2011AC_baseschema_dump/reference_to_synonym.txt' INTO TABLE reference_to_synonym FIELDS TERMINATED BY '\t' LINES TERMINATED BY '\n';
LOAD DATA INFILE 'C:/Quasi Eigene Dateien/Catalogue of Life/2011AC_baseschema_dump/reference_to_taxon.txt' INTO TABLE reference_to_taxon FIELDS TERMINATED BY '\t' LINES TERMINATED BY '\n';
LOAD DATA INFILE 'C:/Quasi Eigene Dateien/Catalogue of Life/2011AC_baseschema_dump/region.txt' INTO TABLE region FIELDS TERMINATED BY '\t' LINES TERMINATED BY '\n';
LOAD DATA INFILE 'C:/Quasi Eigene Dateien/Catalogue of Life/2011AC_baseschema_dump/scientific_name_element.txt' INTO TABLE scientific_name_element FIELDS TERMINATED BY '\t' LINES TERMINATED BY '\n';
LOAD DATA INFILE 'C:/Quasi Eigene Dateien/Catalogue of Life/2011AC_baseschema_dump/scientific_name_status.txt' INTO TABLE scientific_name_status FIELDS TERMINATED BY '\t' LINES TERMINATED BY '\n';
LOAD DATA INFILE 'C:/Quasi Eigene Dateien/Catalogue of Life/2011AC_baseschema_dump/scrutiny.txt' INTO TABLE scrutiny FIELDS TERMINATED BY '\t' LINES TERMINATED BY '\n';
LOAD DATA INFILE 'C:/Quasi Eigene Dateien/Catalogue of Life/2011AC_baseschema_dump/source_database.txt' INTO TABLE source_database FIELDS TERMINATED BY '\t' LINES TERMINATED BY '\n';
LOAD DATA INFILE 'C:/Quasi Eigene Dateien/Catalogue of Life/2011AC_baseschema_dump/specialist.txt' INTO TABLE specialist FIELDS TERMINATED BY '\t' LINES TERMINATED BY '\n';
LOAD DATA INFILE 'C:/Quasi Eigene Dateien/Catalogue of Life/2011AC_baseschema_dump/synonym.txt' INTO TABLE synonym FIELDS TERMINATED BY '\t' LINES TERMINATED BY '\n';
LOAD DATA INFILE 'C:/Quasi Eigene Dateien/Catalogue of Life/2011AC_baseschema_dump/synonym_name_element.txt' INTO TABLE synonym_name_element FIELDS TERMINATED BY '\t' LINES TERMINATED BY '\n';
LOAD DATA INFILE 'C:/Quasi Eigene Dateien/Catalogue of Life/2011AC_baseschema_dump/taxon.txt' INTO TABLE taxon FIELDS TERMINATED BY '\t' LINES TERMINATED BY '\n';
LOAD DATA INFILE 'C:/Quasi Eigene Dateien/Catalogue of Life/2011AC_baseschema_dump/taxon_detail.txt' INTO TABLE taxon_detail FIELDS TERMINATED BY '\t' LINES TERMINATED BY '\n';
LOAD DATA INFILE 'C:/Quasi Eigene Dateien/Catalogue of Life/2011AC_baseschema_dump/taxon_name_element.txt' INTO TABLE taxon_name_element FIELDS TERMINATED BY '\t' LINES TERMINATED BY '\n';
LOAD DATA INFILE 'C:/Quasi Eigene Dateien/Catalogue of Life/2011AC_baseschema_dump/taxonomic_rank.txt' INTO TABLE taxonomic_rank FIELDS TERMINATED BY '\t' LINES TERMINATED BY '\n';
Кроме того, у меня есть длинная таблица музейных экспонатов с научными названиями (как действительными, так и недействительными), которые мне нужно сравнить с базой данных каталога жизни. Таким образом, для каждого имени в моей таблице мне нужно проверить, является ли оно действительным именем, и если вместо этого оно является недействительным синонимом, присвойте ему действительное имя.
Имена в моей таблице музейных образцов имеют следующий формат:
род вид эпитет подвид вид эпитет автор / год
например, Беллис Переннис - Л.
Каждая строка всегда содержит информацию о роде и видах, информация о подвиде и авторе / годе не всегда приводится.
Мне крайне сложно продумать необходимый код запроса для этого таксономического сопоставления, поскольку в одной таблице содержится только комбинация до трех элементов (элемент имени рода, элемент названия вида и иногда элемент имени подвида) вместе с информацией для идентификатора таксона, таксономического ранга и статуса имени таксона из других таблиц создаются действительные имена и синонимы.
Мои музейные экземпляры хранятся в одной большой таблице с названием рода, названием вида, а иногда и именем подвида и / или автора, каждый из которых хранится в разных столбцах.
Было бы весьма признательно решение для соответствующей структуризации кода запроса !! Большое спасибо, что нашли время пройти через это !!