-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathparser.js
More file actions
347 lines (342 loc) · 14.3 KB
/
parser.js
File metadata and controls
347 lines (342 loc) · 14.3 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
let util = require('./util.js')
class Parser {
/**
* @namespace
* @example <caption>Usage</caption>
* parser = require('./parser.js')
* util = require('./util.js')
* ...
* let myParser = new parser.Parser(myCsvString);
* myParser.setMeta({"title":"myData","author":"me"}); //optional
* myParser.setHeaders(0,0); //set the header rows and columns - required
* myParser.clear(/"/g) //clear quotes from data array - optional. Be careful clearing before setting headers, some header values may not may properly to objects if not in string format, such as "15-10"
* myParser.setProps("row"); //set row headers to be the property map source - required
* myParser.propsToArray(/!!/); //splits header values of the selected prop header to arrays for mapping to properties: POPULATION!!15 AND OVER in header becomes [POPULATION, 15 AND OVER] which will become columnheader.POPULATION["15 AND OVER"] = intersecting_value
* let json = myParser.mapProps();
* @param {Array[] | string} [data=Array[]] The data to give to the parser. For convenience, a csv string may be passed which will be parsed into an Array using the common delimiters '\n' and ','. The arrays should be of equal length.
* @returns A new Parser object
* @todo 1. implement error string property
* @todo 2. wrap all util functions in parser
*/
constructor(data=[[]]) {
/** The data array operated on by this Parser. May be referenced for cleaning.
* @public
* @type {Array[]}
* @see util
*/
this.data = [];
if(data instanceof Array) {
this.data = data;
}
else if(typeof data == "string"){
this.data = util.csvArray(data);
}
/**@private */
this.columnHeaders = [];
/**@private */
this.rowHeaders = [];
/**@private */
this.propArr = 'ROW';
/**
* Contains metadata of properties that will be added to the top level JSON object.
*/
this.metadata = {
overlapHeader: "None"
};
}
/**
* Clears unwanted characters from data array or a portion of that array
* @param {(string | string[] | RegExp )} [find='"'] - The string, array of strings, or regular expression to remove.
* @param {boolean} [numerify=true] Calls this.numerify() to also convert any strings that look like numbers into numbers.
* @param {number} [rowind=-1] - The row to operate on. If -1 (default), it will operate on the entire 2D array.
* @param {number} [colind=-1] - The column to operate on. If -1 (default), it will operate on the entire 2D array.
*/
clear(find='"',numerify=true,rowind,colind) {
this.data = util.clear(this.data,find,rowind,colind);
if(numerify) {
this.numerify();
}
}
/**
* Goes through array and all sub-arrays and converts strings into numbers where it is possible to do so.
*/
numerify() {
this.data = util.numerify(this.data);
}
/**
* Sets the internal header arrays and removes those rows and columns from the data. The intersecting element will be removed.
* @param {integer} [rowIndex=0] The row index of the column headers. Negative will operate from end.
* @param {integer} [columnIndex=0] The column index of the row headers. Negative will operate from end.
* @todo implement error string property to log errors in index sizes
*/
setHeaders(rowIndex=0, columnIndex=0) {
if(rowIndex < 0) {
rowIndex = this.data.length + rowIndex;
}
if(rowIndex < 0 || rowIndex > this.data.length-1) {
return;
}
this.columnHeaders = this.data[rowIndex].slice(0); // columnHeaders are a row
if(columnIndex < 0) {
columnIndex = this.data.length + columnIndex;
}
if(columnIndex < 0) {
columnIndex = this.data[0].length + columnIndex;
}
if(columnIndex < 0 || columnIndex > this.data.length-1) {
return;
}
this.metadata.overlapHeader = this.data[rowIndex][columnIndex];
this.rowHeaders = util.getColumn(this.data,columnIndex);
this.rowHeaders = this.rowHeaders.filter( (el,index) => index != rowIndex)
this.columnHeaders = this.columnHeaders.filter( (el,index)=> {return index != columnIndex} )
this.data = util.chop(this.data,rowIndex);
this.data = util.chopColumn(this.data, columnIndex);
}
/**
* Sets either the row headers or the column headers as the one to be mapped - the other column will be the parent object name.
* @param {string} str A string of either 'ROW' or 'COL' - Parser initializes with this set to ROW. A string other than these will do nothing.
*/
setProps(str) {
if(str.toUpperCase() == "ROW") {
this.propArr = "ROW";
}
else if(str.toUpperCase() == "COL") {
this.propArr = "COL";
}
}
/**
* Splits the header column selected by setProps() to an array for mapping to the other column.
* @param {RegExp} [regex=!!] The regex to split the rows on
*/
propsToArray(regex = '!!') {
if(typeof regex == "string") {
regex = new RegExp(regex,'g');
}
let propArr = [];
if(this.propArr == 'ROW') {
propArr = this.rowHeaders;
}
else {
propArr = this.columnHeaders;
}
propArr = propArr.map( (e) => {
return e.split(regex);
});
if(this.propArr == 'ROW') {
this.rowHeaders = propArr;
}
else {
this.columnHeaders = propArr;
}
}
/**
* Merges another row or column into the header column so that header column becomes an array, which causes the higher indexed elements of the array to nest inside the lower indexed elements of the array in the final output json object.
*
* @param {number} index The index to merge to the header(not counting the header itself in the index). If headers set to row, merges that index row. If set to col, merges that column
*/
mergeToHeader(index) {
if(this.propArr == 'COL') {
let mergeRow = this.data[index];
for(let i = 0; i < mergeRow.length; i++) {
let header = this.columnHeaders[i];
let appendValue = mergeRow[i];
if(header instanceof Array) {
header.push(appendValue);
}
else {
this.columnHeaders[i] = [header, appendValue];
}
}
this.data.splice(index,1)
this.rowHeaders.splice(index,1)
}
else {
for(let i = 0; i < this.data.length; i++) {
let mergeValue = this.data[i][index];
let previousHeader = this.rowHeaders[i];
if(previousHeader instanceof Array) {
previousHeader.push(mergeValue);
}
else {
this.rowHeaders[i] = [previousHeader, mergeValue];
}
this.data[i].splice(index,1);
}
this.columnHeaders.splice(index,1);
}
}
/**
* Maps data to an object. The object will contain a property for each value not in the selected props array. For each array in the selected props array, a chain of sub-properties will be created. Metadata, if extant, will be added as properties to the mother object.
* So if you have this data:
* <table>
* <tr><th>"headers"</th><th>"head1"</th><th>"head2"</th></tr>
* <tr><th>"prop1::prop2"</th><td>35</td><td>"val2"</td>
* <tr><th>"prop1::anotherprop"</th><td>"val3"</td><td>5</td></tr>
* </table>
* After setting the props array to "Row" and processing to an array, i.e. <br /><code>[ ["prop1","prop2"], ["prop1","anotherprop"] ]</code><br /> using <code>setProps</code> and <code>propsToArray</code>
* You will have a JSON like the example below.
* @example <caption>Output JSON</caption>
* {
* "head1": {
* "prop1": {
* "prop2":35,
* "anotherprop":"val3"
* }
* },
* "head2": {
* "prop1": {
* "prop2":"val2",
* "anotherprop":5
* }
* }
* }
*/
mapProps() {
let propsArr = [];
let objsArr = [];
if(this.propArr == "ROW") {
propsArr = this.rowHeaders;
objsArr = this.columnHeaders;
}
else {
propsArr = this.columnHeaders;
objsArr = this.rowHeaders;
}
let parentObj = JSON.parse(JSON.stringify(this.metadata)); //simple deep copy;
objsArr = JSON.parse(JSON.stringify(objsArr));
if(this.propArr == "ROW") {
objsArr.forEach((el,i)=> {
let data = util.getColumn(this.data,i);
let o = {};
o = util.chainMultiple(propsArr,data,o,true);
parentObj[el] = o;
});
}
else {
objsArr.forEach( (el, index)=> {
let data = this.data[index];
let o = {};
o = util.chainMultiple(propsArr, data, o, true);
parentObj[el] = o;
} )
}
return parentObj;
}
/**
* Chops a row from the data array in parser. Also removes those indices from the RowHeaders.
* @param {(number | number[] | RegExp) } find The row index to remove, an array of row indexes to remove, or a regular expression. If a regular expression is passed, all rows that match the regex will be removed. Numbers may be negative to operate from the end.
* @param {number | string} [regindex=0] The **column** index to search when using regular expressions. Defaults to the first column, index 0. If a string, such as "HEADER" is passed, the row header array will be searched instead.
* @memberof Parser
*/
chop(find, regindex=0) {
let arr = this.data;
let matchrows = [];
if(typeof regindex == 'string') {
if(find instanceof RegExp) {
this.rowHeaders.forEach( (header,index) => {
if(find.test(header)) {
matchrows.push(index)
}
})
}
}
if(find instanceof RegExp && typeof regindex != 'string') { //find all rows that match
if(regindex < 0) { // match all elements based on regex
arr.forEach( (row,rowind) => {
for(let i = 0; i < row.length; i++) {
if(find.test(row[i])) {
matchrows.push(rowind);
break;
}
}
})
}
else {
arr.forEach( (row, ind) => {
if(find.test(row[regindex])) { //match only regindex col based on regex
matchrows.push(ind);
}
});
}
}
else if(find instanceof Array) {
matchrows = find;
}
else if(typeof find == "number") {
matchrows = [find];
}
matchrows = matchrows.filter( (el)=> typeof el == 'number').sort( (a,b)=> b-a); //remove non-numbers, sort DESC
matchrows.forEach( (el) => {
let rowHeadersFront = this.rowHeaders.slice(0,el);
let rowHeadersBack = this.rowHeaders.slice(el+1, this.rowHeaders.length);
this.rowHeaders = [...rowHeadersFront,...rowHeadersBack]
})
this.data = util.chop(this.data, matchrows, regindex, false);
}
/**
* Chops a column from an array
* @param {(number | number[] | RegExp)} find The column index to remove, an array of column indexes to remove, or a regular expression. If a regular expression is passed _all_ columns that match on the RegIndex row will be removed. Negative numbers will operate from the last column backwards.
* @param {number | string} [regIndex=0] The **row** index to search when using regular expressions as the find parameter. Defaults to first row, index 0. Can also pass in the string "HEADER" to search the header row.
* @memberof Parser
*/
chopColumn(find, regIndex=0) {
let arr = this.data;
let matchcols = [];
if(typeof regIndex == "string") {
if(find instanceof RegExp) {
this.columnHeaders.forEach( (header,index) => {
if(find.test(header)) {
matchcols.push(index);
}
})
}
}
if(find instanceof RegExp && typeof regIndex != "string") {
if(regIndex < 0) {
this.data.forEach( (row) => {
row.forEach( (el, index)=> {
if(find.test(el)) {
matchcols.push(index);
}
})
})
}
else {
arr[regIndex].forEach( (el,index) => {
if(find.test(el)) {
matchcols.push(index);
}
})
}
}
else if(find instanceof Array) {
matchcols = find;
}
else if(typeof find == 'number') {
matchcols = [find];
}
this.data = util.chopColumn(arr,matchcols);
matchcols = matchcols.filter( (el)=> typeof el == 'number').sort( (a,b)=> b-a); //remove non-numbers, sort DESC
matchcols.forEach( (el) => {
let columnHeadersFront = this.columnHeaders.slice(0,el);
let columnHeadersBack = this.columnHeaders.slice(el+1, this.columnHeaders.length)
this.columnHeaders = [...columnHeadersFront,...columnHeadersBack]
})
}
}
/**
* A class that can perform special mapping functions.
*
* @see https://www.quaffingcode.com/census-csv-parser/doc/Parser.html
*
* */
exports.Parser = Parser;
/**
* Utility functions used by the parser for transforming arrays.
*
* Can be handy on their own.
*
* @see https://www.quaffingcode.com/census-csv-parser/doc/util.html
*/
exports.util = util;