У меня есть приложение Angular 9, которое сильно полагается на данные. Мы загружаем данные из файла Apache Arrow, который относительно эффективен, но я хотел попытаться повысить производительность с помощью веб-воркеров для обработки данных. Однако производительность веб-воркеров не лучше, а даже хуже. Мне любопытно, почему. Я, вероятно, делаю что-то не так. Любые идеи приветствуются.
Моя реализация: у меня есть служба данных, которая может включать / отключать веб-воркеров. Я использую основной метод "getDashboardMetrics". Он возвращает обещание. Я отслеживаю вызовы своих рабочих и сохраняю их в карте обещаний, чтобы я мог просто вернуть обещание из метода.
data.service.ts:
export class DataService {
private workerCallId = 0;
private workerPromises = {};
private processor: DataProcessor;
public worker: Worker;
public enableWebWorkers = false;
constructor() {
if (typeof Worker !== "undefined" && this.enableWebWorkers) {
this.initializeWorker();
} else {
// Web Workers are not supported in this environment or disabled
this.processor = new DataProcessor();
}
(window as any).DataService = this;
}
initializeWorker = () => {
this.worker = new Worker("./data.worker.ts", { type: "module" });
this.worker.onmessage = ({ data }) => {
const decodedResults = JSON.parse(new TextDecoder().decode(data));
const { callId, results } = decodedResults;
if (results && this.workerPromises[callId]) {
this.workerPromises[callId].resolve(results);
delete this.workerPromises[callId];
}
};
};
getUniqueValuesAndHierarchy = async (
columnNames: string[],
nonHierarchicalColumnNames: string[]
): Promise<{ uniqueValues: any; hierarchy: any }> => {
const callId = this.workerCallId++;
const action = "getUniqueValuesAndHierarchy";
this.workerPromises[callId] = {};
return this.worker
? new Promise((resolve, reject) => {
this.workerPromises[callId].resolve = resolve;
this.workerPromises[callId].reject = reject;
this.worker.postMessage({ callId, columnNames, nonHierarchicalColumnNames, action });
})
: this.processor.getUniqueValuesAndHierarchy(columnNames, nonHierarchicalColumnNames);
};
getDashboardMetrics = async (filters: IFilters, groupBy: string[], workflowMode: WorkflowModeType): Promise<{ detailMetrics: any }> => {
const callId = this.workerCallId++;
const action = "getDashboardMetrics";
this.workerPromises[callId] = {};
if (this.processor && this.processor.getWorkflowMode() !== workflowMode) {
this.processor.setWorkflowMode(workflowMode);
}
return this.worker
? new Promise((resolve, reject) => {
this.workerPromises[callId].resolve = resolve;
this.workerPromises[callId].reject = reject;
this.worker.postMessage({ callId, filters, groupBy, action });
})
: this.processor.getDashboardMetrics(filters, groupBy);
};
getFilteredVisits = async (filters: IFilters) => {
return this.processor.getFilteredVisits(filters);
};
getGroupedMetrics = async (visits: DemoDataRow[], groupBy: string[]) => {
return this.processor.getGroupedMetrics(visits, groupBy);
};
}
У меня также есть объект DataProcessor, который выполняет выборку и обработку данных. Он создается либо из службы данных (в случае, если веб-воркеры отключены), либо из воркера.
data-processor.ts:
export class DataProcessor {
private arrowTable: Promise<ArrowTable>;
private workflowMode: WorkflowModeType = "coding";
arrowIndex: string[];
private worker: Worker;
get indexUrl() {
return this.workflowMode === "cdi" ? "/assets/data/cdi.index.json" : "/assets/data/data.index.json";
}
get arrowUrl() {
return this.workflowMode === "cdi" ? "/assets/data/cdi.arrow" : "/assets/data/data.arrow";
}
getWorkflowMode() {
return this.workflowMode;
}
setWorkflowMode(workflow: WorkflowModeType) {
this.workflowMode = workflow;
this.getArrowTable(true);
}
getArrowTable = (force = false): Promise<ArrowTable> => {
if (this.arrowTable && !force) return this.arrowTable;
const { indexUrl, arrowUrl } = this;
this.arrowTable = fetch(indexUrl).then((indexResp) => {
return indexResp.json().then((indexJson) => {
this.arrowIndex = indexJson;
return fetch(arrowUrl).then((arrowResp) => {
return arrowResp.arrayBuffer().then((arrayBuffer) => {
return ArrowTable.from([new Uint8Array(arrayBuffer)]);
});
});
});
});
return this.arrowTable;
};
getFilteredVisits = async (filters: IFilters): Promise<any> => {
const { regions, facilities, patientTypes, visitTypes, financialClasses, totalCharges, dateRange } = filters;
const arrowTable = await this.getArrowTable();
const rv: any[] = [];
if (!this.arrowTable) return rv;
if (
!regions.length &&
!facilities.length &&
!patientTypes.length &&
!visitTypes.length &&
!financialClasses.length &&
!totalCharges.length &&
!dateRange
) {
for (const row of arrowTable) rv.push(row);
return rv;
}
const iin = (colName, array) => {
const arStr = `[${array.map((item) => item.id).join(",")}]`;
return `${arStr}.some(x => x === row.${colName})`;
};
const between = (colName, min, max) => {
return `(row.${colName} >= ${min} && row.${colName} <= ${max})`;
};
const compareValue = (colName, value) => {
return `(row.${colName} == ${value})`;
};
const predicates: string[] = [];
// if facilities are filtered, no need to look at regions
if (filters.facilities.length) predicates.push(iin("Facility", filters.facilities));
else if (filters.regions.length) predicates.push(iin("Region", filters.regions));
if (filters.patientTypes.length) predicates.push(iin("PatientType", filters.patientTypes));
if (filters.visitTypes.length) predicates.push(iin("VisitType", filters.visitTypes));
if (filters.financialClasses.length) predicates.push(iin("FinancialClass", filters.financialClasses));
if (filters.totalCharges.length) predicates.push(iin("TotalChargesGroup", filters.totalCharges));
if (filters.dateRange && this.workflowMode !== "cdi") {
const { startDate, endDate } = filters.dateRange;
const startDay: number = startDate ? moment(startDate).diff(moment(Date.now()), "days") * -1 : 1;
const endDay: number = endDate ? moment(endDate).diff(moment(Date.now()), "days") * -1 : 0;
predicates.push(between("Day", endDay, startDay));
}
if (filters.overdue) predicates.push(compareValue("Overdue", 1));
if (!predicates.length) return arrowTable;
// eslint-disable-next-line no-new-func
const predicate = new Function("row", `return ${predicates.join(" && ")};`);
for (const row of arrowTable) {
if (predicate(row)) rv.push(row);
}
return rv;
};
groupBy = async (columnNames: string, filters?: IFilters) => {
const arrowTable = await this.getArrowTable();
const filteredVisits = filters ? await this.getFilteredVisits(filters) : arrowTable;
const colNames = (columnNames
.trim()
.split(/\s*,\s*/)
.map((s) => {
const field = arrowTable.schema.fields.find((f) => f.name === s);
const isPacked = !!(field?.metadata && field.metadata.get("packed"));
return {
name: s,
isPacked,
isNumeric: field && !isPacked && (field.type instanceof Int || field.type instanceof Float),
};
}) as unknown) as { name: string; isPacked: boolean; isNumeric: boolean }[];
const dict = {};
for (const row of filteredVisits) {
let rdict = dict;
colNames.forEach((cn) => {
const val = row.get(cn.name);
const prop = (rdict[val] = rdict[val] || {});
prop.count = (prop.count || 0) + 1;
prop.id = val;
rdict = prop;
});
}
// unpack names if needed
if (this.arrowIndex) {
this.unmapKeys(dict, colNames as { isPacked: boolean }[], this.arrowIndex, 0);
}
return dict;
};
groupByWithStatusCoding = async (columnNames: string, filteredVisits: DemoDataRow[]) => {
const arrowTable = await this.getArrowTable();
const colNames = (columnNames
.trim()
.replace(/,$/, "")
.split(/\s*,\s*/)
.map((s) => {
const field = arrowTable.schema.fields.find((f) => f.name === s);
const isPacked = !!(field?.metadata && field.metadata.get("packed"));
return {
name: s,
isPacked,
isNumeric: field && !isPacked && (field.type instanceof Int || field.type instanceof Float),
};
}) as unknown) as { name: string; isPacked: boolean; isNumeric: boolean }[];
const dict = {};
for (const row of filteredVisits) {
let rdict = dict;
colNames.forEach((cn) => {
const val = row.get(cn.name);
const status = this.arrowIndex[row.get("Status")];
const codedDay = row.get("CodedDay");
const onHoldDay = row.get("PlacedOnHoldDay");
const overdue = row.get("Overdue");
const queriesSentToday = row.get("QueriesSentToday");
const prop = (rdict[val] = rdict[val] || {});
prop.CodedToday = codedDay === 0 ? (prop.CodedToday || 0) + 1 : prop.CodedToday || 0;
prop.OnHoldToday = onHoldDay === 0 ? (prop.OnHoldToday || 0) + 1 : prop.OnHoldToday || 0;
prop.HasQueriesSentToday = queriesSentToday ? (prop.HasQueriesSentToday || 0) + 1 : prop.HasQueriesSentToday;
prop.Overdue = overdue ? (prop.Overdue || 0) + 1 : prop.Overdue || 0;
prop.count = (prop.count || 0) + 1;
prop[status] = (prop[status] || 0) + 1;
prop[`${status}Overdue`] = overdue ? (prop[`${status}Overdue`] || 0) + 1 : prop[`${status}Overdue`];
prop.id = val;
rdict = prop;
});
}
// unpack names if needed
if (this.arrowIndex) {
this.unmapKeys(dict, colNames as { isPacked: boolean }[], this.arrowIndex, 0);
}
return dict;
};
formatGroupsForCoding(groups: IHierarchicalGroup, groupByNames: string[]) {
return Object.keys(groups).map((g) => {
const {
Coded: coded = 0,
NotReady: notReady = 0,
Ready: ready = 0,
OnHold: onHold = 0,
NotReadyOverdue: notReadyOverdue = 0,
ReadyOverdue: readyOverdue = 0,
OnHoldOverdue: onHoldOverdue = 0,
CodedOverdue: codedOverdue = 0,
CodedToday: codedToday = 0,
OnHoldToday: onHoldToday = 0,
Overdue: overdue = 0,
HasQueriesSentToday: hasQueriesSentToday = 0,
count: total = 0,
id,
...secondGroup
} = groups[g];
const colName = groupByNames[0] ? groupByNames[0].replace(/^[A-Z]/, groupByNames[0][0].toLowerCase()) : groupByNames[0];
return {
[colName]: colName === "day" ? Number(g) : g,
id,
coded,
codedToday,
notReady,
ready,
onHold,
onHoldToday,
overdue,
hasQueriesSentToday,
notReadyOverdue,
readyOverdue,
onHoldOverdue,
codedOverdue,
total,
subGroup: groupByNames.length > 1 ? this.formatGroupsForCoding(secondGroup as any, groupByNames.slice(1)) : null,
};
});
}
groupByWithStatusCdi = async (columnNames: string, filteredVisits: DemoDataRow[]) => {
const arrowTable = await this.getArrowTable();
const colNames = (columnNames
.trim()
.replace(/,$/, "")
.split(/\s*,\s*/)
.map((s) => {
const field = arrowTable.schema.fields.find((f) => f.name === s);
const isPacked = !!(field?.metadata && field.metadata.get("packed"));
return {
name: s,
isPacked,
isNumeric: field && !isPacked && (field.type instanceof Int || field.type instanceof Float),
};
}) as unknown) as { name: string; isPacked: boolean; isNumeric: boolean }[];
const dict = {};
for (const row of filteredVisits) {
let rdict = dict;
colNames.forEach((cn) => {
const val = row.get(cn.name);
const status = this.arrowIndex[row.get("Status")];
const priorityScore = row.get("PriorityScore");
const dischargeDay = row.get("DischargeDay");
const cdiQueryDay = row.get("CdiQueryDay");
const cdiQueryResponseDay = row.get("CdiQueryResponseDay");
const qualityReviewNeeded = row.get("QualityReviewNeeded");
const prop = (rdict[val] = rdict[val] || {});
const isDischargedWithTasks =
dischargeDay !== null && cdiQueryDay !== null && cdiQueryResponseDay === null && priorityScore >= 80;
const reviewNeeded = qualityReviewNeeded && priorityScore >= 80;
prop.count = (prop.count || 0) + 1;
prop[status] = (prop[status] || 0) + 1;
prop[`${status}NeedsReview`] = reviewNeeded ? (prop[`${status}NeedsReview`] || 0) + 1 : prop[`${status}NeedsReview`];
prop.ReviewNeeded = reviewNeeded ? (prop.ReviewNeeded || 0) + 1 : prop.ReviewNeeded;
prop.IsDischargedWithTasks = isDischargedWithTasks ? (prop.IsDischargedWithTasks || 0) + 1 : prop.IsDischargedWithTasks;
prop.ReadyPriority = status === "Ready" && priorityScore >= 80 ? (prop.ReadyPriority || 0) + 1 : prop.ReadyPriority || 0;
prop.id = val;
rdict = prop;
});
}
// unpack names if needed
if (this.arrowIndex) {
this.unmapKeys(dict, colNames as { isPacked: boolean }[], this.arrowIndex, 0);
}
return dict;
};
formatGroupsForCdi(groups: IHierarchicalGroup, groupByNames: string[]) {
return Object.keys(groups).map((g) => {
const {
Reviewed: reviewed = 0,
NotReady: notReady = 0,
Ready: ready = 0,
PendingResponse: pendingResponse = 0,
ReadyPriority: readyPriority = 0,
IsDischargedWithTasks: isDischargedWithTasks = 0,
ReviewNeeded: qualityReviewNeeded = 0,
ReviewedNeedsReview: reviewedNeedsReview = 0,
NotReadyNeedsReview: notReadyNeedsReview = 0,
ReadyNeedsReview: readyNeedsReview = 0,
PendingResponseNeedsReview: pendingResponseNeedsReview = 0,
count: total = 0,
id,
...secondGroup
} = groups[g];
const colName = groupByNames[0] ? groupByNames[0].replace(/^[A-Z]/, groupByNames[0][0].toLowerCase()) : groupByNames[0];
return {
[colName]: colName === "day" ? Number(g) : g,
id,
reviewed,
notReady,
ready,
pendingResponse,
readyPriority,
isDischargedWithTasks,
qualityReviewNeeded,
reviewedNeedsReview,
notReadyNeedsReview,
readyNeedsReview,
pendingResponseNeedsReview,
total,
subGroup:
groupByNames.length > 1 ? this.formatGroupsForCdi(secondGroup as IHierarchicalGroup, groupByNames.slice(1)) : null,
};
});
}
private unmapKeys(obj: unknown, columnNames: { isPacked: boolean }[], map: string[], level: number) {
if (!obj) return;
const cn = columnNames[level];
Object.keys(obj).forEach((k) => {
const newK = map[k];
if (cn.isPacked && !Number.isNaN(+k)) {
obj[newK] = obj[k];
delete obj[k];
if (level < columnNames.length - 1) this.unmapKeys(obj[newK], columnNames, map, level + 1);
} else if (level < columnNames.length - 1) this.unmapKeys(obj[k], columnNames, map, level + 1);
});
}
getUniqueValues = async (columnNames: string[]) => {
const returnValue = {};
for (const columnName of columnNames) {
const availableOptions = [];
for (const value of await this.getDictionary(columnName)) {
availableOptions.push(value);
}
returnValue[columnName] = availableOptions;
}
return returnValue;
};
getDictionary = async (columnName: string) => {
const arrowTable = await this.getArrowTable();
const column = arrowTable.getColumn(columnName);
if (!this.arrowIndex) return column.dictionary?.toArray();
return uniq(column.toArray()).map((n: number) => ({ id: n, name: this.arrowIndex[n] }));
};
getUniqueValuesAndHierarchy = async (columnNames: string[], nonHierarchicalColumnNames: string[]) => {
return {
uniqueValues: await this.getUniqueValues([...columnNames, ...nonHierarchicalColumnNames]),
hierarchy: await this.groupBy(columnNames.join(", ")),
};
};
getQuickMetrics = (visits: DemoDataRow[]) => {
const quickMetrics = {
allAccounts: visits.length,
coded: 0,
ready: 0,
notReady: 0,
onHold: 0,
overdueAccounts: 0,
};
for (const visit of visits) {
if (visit.CodedDay !== null) ++quickMetrics.coded;
else if (visit.PlacedOnHoldDay !== null && visit.CodedDay === null) ++quickMetrics.onHold;
else if (visit.ReadyToCodeDay === null) ++quickMetrics.notReady;
else if (visit.ReadyToCodeDay !== null && visit.CodedDay === null) ++quickMetrics.ready;
if (visit.Day > 5 && visit.CodedDay === null) ++quickMetrics.overdueAccounts;
}
return quickMetrics;
};
getGroupedMetrics = async (visits: DemoDataRow[], groupBy: string[]) => {
const colNames = groupBy.map((g) => (g ? g.replace(/^[a-z]/, g[0].toUpperCase()) : g)).join(",");
const groups =
this.workflowMode === "coding"
? await this.groupByWithStatusCoding(colNames, visits)
: await this.groupByWithStatusCdi(colNames, visits);
const formatted =
this.workflowMode === "coding" ? this.formatGroupsForCoding(groups, groupBy) : this.formatGroupsForCdi(groups, groupBy);
return formatted;
};
getDashboardMetrics = async (filters: IFilters, groupBy: string[]) => {
const visits = await this.getFilteredVisits(filters);
return {
detailMetrics: await this.getGroupedMetrics(visits, groupBy),
};
};
}
мой рабочий код, data.worker.ts:
import { DataProcessor } from "./data-processor";
const processor = new DataProcessor();
self.onmessage = async ({ data }) => {
const { filters, groupBy, callId, action, nonHierarchicalColumnNames, columnNames } = data;
let results;
if (action === "getDashboardMetrics") {
results = await processor.getDashboardMetrics(filters, groupBy);
} else if (action === "getUniqueValuesAndHierarchy") {
results = await processor.getUniqueValuesAndHierarchy(columnNames, nonHierarchicalColumnNames);
}
// Converting to arraybuffer to transfer
const dataString = JSON.stringify({ results, callId });
const buffer = new TextEncoder().encode(dataString).buffer;
self.postMessage(buffer, [buffer]);
};
Там много asyn c код, который, как мне кажется, может быть проблемой. Я использую объект передачи данных, чтобы посмотреть, улучшило ли это производительность, но не совсем. Есть идеи?