Удалить символы новой строки из столбцов вложенных данных - PullRequest
0 голосов
/ 11 апреля 2019

У меня есть фрейм данных со схемой

root
 |-- AppUsers: array (nullable = true)
 |    |-- element: struct (containsNull = true)
 |    |    |-- Id: integer (nullable = true)
 |    |    |-- Email: string (nullable = true)
 |    |    |-- FirstName: string (nullable = true)
 |    |    |-- LastName: string (nullable = true)
 |    |    |-- UserName: string (nullable = true)
 |-- BusinessLines: array (nullable = true)
 |    |-- element: struct (containsNull = true)
 |    |    |-- Id: integer (nullable = true)
 |    |    |-- Name: string (nullable = true)
 |-- Campaigns: array (nullable = true)
 |    |-- element: struct (containsNull = true)
 |    |    |-- Id: integer (nullable = true)
 |    |    |-- BusinessLineId: integer (nullable = true)
 |    |    |-- Name: string (nullable = true)
 |    |    |-- StartDate: date (nullable = true)
 |    |    |-- EndDate: date (nullable = true)
 |    |    |-- Imported: boolean (nullable = true)
 |    |    |-- IsClosed: string (nullable = true)
 |-- CampaignDomains: array (nullable = true)
 |    |-- element: struct (containsNull = true)
 |    |    |-- CampaignId: integer (nullable = true)
 |    |    |-- DomainId: integer (nullable = true)
 |-- CampaignDomainEntityComments: array (nullable = true)
 |    |-- element: struct (containsNull = true)
 |    |    |-- CampaignId: integer (nullable = true)
 |    |    |-- DomainId: integer (nullable = true)
 |    |    |-- EntityId: integer (nullable = true)
 |    |    |-- Comment: string (nullable = true)
 |-- CampaignEntities: array (nullable = true)
 |    |-- element: struct (containsNull = true)
 |    |    |-- CampaignId: integer (nullable = true)
 |    |    |-- EntityId: integer (nullable = true)
 |    |    |-- ClosedDate: date (nullable = true)
 |    |    |-- ClosedBy: string (nullable = true)
 |-- CampaignDomainEntities: array (nullable = true)
 |    |-- element: struct (containsNull = true)
 |    |    |-- DomainId: integer (nullable = true)
 |    |    |-- CampaignId: integer (nullable = true)
 |    |    |-- EntityId: integer (nullable = true)
 |    |    |-- Status: string (nullable = true)
 |    |    |-- ValidationDate: date (nullable = true)
 |    |    |-- ValidatedBy: string (nullable = true)
 |-- Domains: array (nullable = true)
 |    |-- element: struct (containsNull = true)
 |    |    |-- Id: integer (nullable = true)
 |    |    |-- Code: string (nullable = true)
 |    |    |-- BusinessLineId: integer (nullable = true)
 |    |    |-- Name: string (nullable = true)
 |    |    |-- Order: integer (nullable = true)
 |    |    |-- Enabled: boolean (nullable = true)
 |-- Entities: array (nullable = true)
 |    |-- element: struct (containsNull = true)
 |    |    |-- Id: integer (nullable = true)
 |    |    |-- Code: string (nullable = true)
 |    |    |-- BasesClient: string (nullable = true)
 |    |    |-- BusinessLineId: integer (nullable = true)
 |    |    |-- Name: string (nullable = true)
 |    |    |-- Pole: string (nullable = true)
 |    |    |-- PoleCode: string (nullable = true)
 |    |    |-- PoleLabel: string (nullable = true)
 |    |    |-- Transactions: string (nullable = true)
 |    |    |-- Enabled: boolean (nullable = true)
 |    |    |-- ELRId: string (nullable = true)
 |    |    |-- ELRDescription: string (nullable = true)
 |    |    |-- UOId: string (nullable = true)
 |    |    |-- UODescription: string (nullable = true)
 |-- Groups: array (nullable = true)
 |    |-- element: struct (containsNull = true)
 |    |    |-- Id: integer (nullable = true)
 |    |    |-- Code: string (nullable = true)
 |    |    |-- BusinessLine: integer (nullable = true)
 |    |    |-- Name: string (nullable = true)
 |    |    |-- Enabled: boolean (nullable = true)
 |    |    |-- IsCampaign: boolean (nullable = true)
 |-- GroupEntities: array (nullable = true)
 |    |-- element: struct (containsNull = true)
 |    |    |-- EntityId: integer (nullable = true)
 |    |    |-- GroupId: integer (nullable = true)
 |-- Indicators: array (nullable = true)
 |    |-- element: struct (containsNull = true)
 |    |    |-- Id: integer (nullable = true)
 |    |    |-- Code: string (nullable = true)
 |    |    |-- AccessLevel: string (nullable = true)
 |    |    |-- CanBeCopied: boolean (nullable = true)
 |    |    |-- Definition: string (nullable = true)
 |    |    |-- ModeReporting: string (nullable = true)
 |    |    |-- NameEN: string (nullable = true)
 |    |    |-- NameFR: string (nullable = true)
 |    |    |-- Order: integer (nullable = true)
 |    |    |-- Perimeter: string (nullable = true)
 |    |    |-- PeriodTypeEN: string (nullable = true)
 |    |    |-- PeriodTypeFR: string (nullable = true)
 |    |    |-- PeriodTypeId: integer (nullable = true)
 |    |    |-- SubDomainId: integer (nullable = true)
 |    |    |-- Type: string (nullable = true)
 |    |    |-- Enabled: boolean (nullable = true)
 |    |    |-- OversightIndicatorID: string (nullable = true)
 |-- IndicatorEntities: array (nullable = true)
 |    |-- element: struct (containsNull = true)
 |    |    |-- EntityId: integer (nullable = true)
 |    |    |-- IndicatorId: integer (nullable = true)
 |-- SubDomains: array (nullable = true)
 |    |-- element: struct (containsNull = true)
 |    |    |-- Id: integer (nullable = true)
 |    |    |-- Code: string (nullable = true)
 |    |    |-- Comment: string (nullable = true)
 |    |    |-- Name: string (nullable = true)
 |    |    |-- Order: integer (nullable = true)
 |    |    |-- Enabled: boolean (nullable = true)
 |    |    |-- DomainId: integer (nullable = true)
 |-- SubIndicators: array (nullable = true)
 |    |-- element: struct (containsNull = true)
 |    |    |-- Id: integer (nullable = true)
 |    |    |-- Code: string (nullable = true)
 |    |    |-- IndicatorId: integer (nullable = true)
 |    |    |-- NameEN: string (nullable = true)
 |    |    |-- NameFR: string (nullable = true)
 |    |    |-- Order: integer (nullable = true)
 |    |    |-- Type: string (nullable = true)
 |    |    |-- Unit: string (nullable = true)
 |    |    |-- ValueListNameId: integer (nullable = true)
 |    |    |-- IsMandatory: boolean (nullable = true)
 |    |    |-- IsGDPR: boolean (nullable = true)
 |    |    |-- OversightSubIndicatorID: string (nullable = true)
 |-- ValueLists: array (nullable = true)
 |    |-- element: struct (containsNull = true)
 |    |    |-- Id: integer (nullable = true)
 |    |    |-- NameEN: string (nullable = true)
 |    |    |-- NameFR: string (nullable = true)
 |    |    |-- Value: integer (nullable = true)
 |    |    |-- ValueListNameId: integer (nullable = true)
 |-- ValueListNames: array (nullable = true)
 |    |-- element: struct (containsNull = true)
 |    |    |-- Id: integer (nullable = true)
 |    |    |-- NameEN: string (nullable = true)
 |    |    |-- NameFR: string (nullable = true)
 |-- Comments: array (nullable = true)
 |    |-- element: struct (containsNull = true)
 |    |    |-- Id: integer (nullable = true)
 |    |    |-- Code: string (nullable = true)
 |    |    |-- Definition: string (nullable = true)
 |-- CommentValues: array (nullable = true)
 |    |-- element: struct (containsNull = true)
 |    |    |-- CampaignId: integer (nullable = true)
 |    |    |-- CommentId: integer (nullable = true)
 |    |    |-- Value: string (nullable = true)

Печать фрейма данных:

+--------------------------------------+-------------+---------+---------------+----------------------------+-----------------------+--------------------------+--------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-----------------------------------+-------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-----------------+----------------------------------+---------------------------------------------------------------+-----------------------------------------------------------+-------------------------------------+-----------------+----------------+
|AppUsers                              |BusinessLines|Campaigns|CampaignDomains|CampaignDomainEntityComments|CampaignEntities       |CampaignDomainEntities    |Domains                   |Entities                                                                                                                                                                                  |Groups                             |GroupEntities|Indicators                                                                                                                                                                                                                                                                                                                                                                   |IndicatorEntities|SubDomains                        |SubIndicators                                                  |ValueLists                                                 |ValueListNames                       |Comments         |CommentValues   |
+--------------------------------------+-------------+---------+---------------+----------------------------+-----------------------+--------------------------+--------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-----------------------------------+-------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-----------------+----------------------------------+---------------------------------------------------------------+-----------------------------------------------------------+-------------------------------------+-----------------+----------------+
|[[1,null,JEROEN,SOMERS,JEROEN.SOMERS]]|[[1,PRIV]]   |null     |[[1,2]]        |[[122,1,9,add comments ]]   |[[1,1,2018-08-24,null]]|[[1,11,1,Draft,null,null]]|[[1,1,1,INCIDENTS,1,true]]|[[1,0071300000,Outil central (FORCE),1,SGPB MONACO GESTION PRIVEE,PRIV,000423,PRIV Monaco,Outil central (FORCE),true,0071300000,SOCIETE GENERALE PRIVATE BANKING (MONACO),20664,PRIV/MON]]|[[1,1,null,SGPB GROUPE,true,false]]|[[1,1]]      |[[18174,D3E_I1,EndUser,false,Rappel : les instructions transposées doivent être validées par la Conformité IBFS avant d'être soumises à la validation du Management de votre entité.,Flow,IBFS 000449 - IBFS Compliance Manual - published on 01/29/2015,IBFS 000449 - Manuel de conformité IBFS - publié le 29/01/2015,1,Global,Monthly,Mensuel,1,440,Complex,true,FCC.1.1]]|[[1,1]]          |[[1,18,null,Key Points,1,true,18]]|[[1,18.1,1,Entity,Entity,111,Text,,null,false,false,FCC.1.1.1]]|[[1,Discretionary management,Discretionary management,1,1]]|[[1,Compliance Item,Compliance Item]]|[[4,Priv-1,null]]|[[13,4,112323 ]]|
+--------------------------------------+-------------+---------+---------------+----------------------------+-----------------------+--------------------------+--------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-----------------------------------+-------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-----------------+----------------------------------+---------------------------------------------------------------+-----------------------------------------------------------+-------------------------------------+-----------------+----------------+


Описание в индикаторах содержит символы новой строки и некоторые нежелательные символы, такие как ",;

Я хочу удалить эти нежелательные символы из подстолбца описания и сохранить структуру такой, какая она есть. Я сделал это с плоской структурой, но вложенная структура кажется запутанной.

Для простоты я удалилбольшинство полей и оставлены только те, к которым я хочу применить преобразование

Пример ввода:

{
  "AppUsers": [
    {
      "Id": 1,
      "UserName": "abc.bcd",
    }
  ],
  "Indicators": [
    {
      "Definition": "Rappel ;;;;; , \n",
    }
  ]
}

Ожидаемый результат:

{
  "AppUsers": [
    {
      "Id": 1,
      "UserName": "abc.bcd",
    }
  ],
  "Indicators": [
    {

      "Definition": "Rappel",
    }
  ]
}

Ненужные символы должны быть удалены из столбца Indicators.Definition. Пожалуйста, помогите

1 Ответ

0 голосов
/ 11 апреля 2019

Возможно, вы можете попытаться получить доступ к вашему столбцу и использовать regexp_replace для удаления нежелательных символов.Ниже приведен пример.

df = df.withColumn('Definition', regexp_replace(col('Indicators').getItem(4)), "/[~%&\\;:"',<>?#\s]/g",""))

Добро пожаловать на сайт PullRequest, где вы можете задавать вопросы и получать ответы от других членов сообщества.
...