-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathProgram.cs
More file actions
216 lines (191 loc) · 9.56 KB
/
Program.cs
File metadata and controls
216 lines (191 loc) · 9.56 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
using System;
using System.Globalization;
using System.IO;
using System.Linq;
using System.Text;
namespace EMA.Tools
{
class Program
{
private const long one_gigabyte = 1024 * 1024 * 1024;
private const long min_large_file_size = (long)(2 * one_gigabyte); // consider files with >2Gb to be large enough for spliting.
private const long file_size_limit = one_gigabyte; // indicates the size of final ouputs.
static void Main(string[] args)
{
Console.WriteLine("--- Large text file splitter ---");
var targetdir = (string)null;
var min_large_file_size_gb = (double)min_large_file_size / (double)one_gigabyte;
var file_size_limit_gb = (double)file_size_limit / (double)one_gigabyte;
var start_gb = 0.0d;
if (args.Length > 0)
{
for(int i = 0; i < args.Length; i++)
{
if (args[i] == "--help")
{
Console.WriteLine("Files must be in same directory as app.");
Console.WriteLine("Add -min x where x is size in GB to set file size detection threshold.");
Console.WriteLine("Add -size x where x is size in GB to set resulting file size.");
Console.WriteLine("Add -start x where x is position in GB to set a starting point for processing (default is 0).");
}
else if (args[i] == "-min" && i + 1 < args.Length)
{
if (double.TryParse(args[i + 1], NumberStyles.Any, CultureInfo.InvariantCulture, out double min_size))
min_large_file_size_gb = min_size;
else Console.WriteLine("Could not parse min argument");
}
else if (args[i] == "-size" && i + 1 < args.Length)
{
if (double.TryParse(args[i + 1], NumberStyles.Any, CultureInfo.InvariantCulture, out double min_size))
file_size_limit_gb = min_size;
else Console.WriteLine("Could not parse size argument");
}
else if (args[i] == "-start" && i + 1 < args.Length)
{
if (double.TryParse(args[i + 1], NumberStyles.Any, CultureInfo.InvariantCulture, out double min_size))
start_gb = min_size;
else Console.WriteLine("Could not parse start argument");
}
else if (targetdir == null)
{
try
{
// Try to find if a directory was passed as argument:
var targetpath = Path.GetFullPath(args[i]);
targetdir = Directory.Exists(targetpath) ? targetpath : null;
}
catch {}
}
}
}
Console.WriteLine("Target directory is: " + targetdir);
Console.WriteLine("Considering large files when size > " + min_large_file_size_gb.ToString("F3") + " GB to be split into " + file_size_limit_gb.ToString("F3") + " GB files");
Console.WriteLine("");
var currentDir = new DirectoryInfo(targetdir ?? Directory.GetCurrentDirectory());
var largeFileList = currentDir.GetFiles().Where(x => x.Length > min_large_file_size_gb * one_gigabyte).ToList();
// Quit if no file found:
if (largeFileList.Count == 0)
{
Console.WriteLine("No files >" + min_large_file_size_gb.ToString("F3") + " GB were found");
Console.ReadKey();
return;
}
// Else display large files:
Console.WriteLine("Found " + largeFileList.Count + " files >" + min_large_file_size_gb + " GB");
var timeOrderedFiles = largeFileList.OrderBy(x => x.LastWriteTime); // order them by last modification
int count = 0;
foreach (var file in timeOrderedFiles)
{
Console.WriteLine(count++ + ". " + file.Name + " - " + file.LastWriteTime + " - " + file.Length + " bytes.");
}
var keep_going = true;
Console.WriteLine("");
while (keep_going)
{
Console.Write("Select a file number then press enter: ");
var answer = "";
try
{
answer = readLineWithCancel();
Console.WriteLine("");
}
catch
{
Console.WriteLine("");
Console.WriteLine("Quitting...");
return;
}
if (UInt32.TryParse(answer, out uint selection) && selection < largeFileList.Count)
{
keep_going = false;
var largeFileInfo = timeOrderedFiles.ToArray()[selection];
var reader = new StreamReader(largeFileInfo.FullName);
var writter = (StreamWriter)null;
var writter_base_path = Path.Combine(targetdir, largeFileInfo.Name.Remove(largeFileInfo.Name.Length - largeFileInfo.Extension.Length, largeFileInfo.Extension.Length));
var writting_size = 0;
var file_number = 1;
var progression_step = 0;
if (start_gb * one_gigabyte >= largeFileInfo.Length)
{
Console.WriteLine("Start point is greater than file size. There is nothing to do...");
Console.WriteLine("Quitting...");
return;
}
var start = (long)(start_gb * one_gigabyte);
var output_files_number = (int)((largeFileInfo.Length - start) / (file_size_limit_gb * one_gigabyte)) + 1;
var reading_size = (long)0;
Console.WriteLine("");
Console.WriteLine("Going to split " + largeFileInfo.Name + " into " + output_files_number + " files.");
if (start_gb > 0)
{
Console.WriteLine("Note that file processing starts from " + start_gb.ToString("F3") + " GB");
Console.WriteLine("Please wait while reading the first part of your large file...");
}
try
{
using (var largefile = new StreamReader(largeFileInfo.FullName))
{
while (!largefile.EndOfStream)
{
var line = largefile.ReadLine();
reading_size += line.Length;
if (reading_size < start)
continue;
if (writter == null)
{
var writter_path = writter_base_path + "-reduced-" + file_number + largeFileInfo.Extension;
writter = new StreamWriter(writter_path, false, largefile.CurrentEncoding);
Console.WriteLine("Creating " + writter_path);
Console.Write(file_number++ + "/" + output_files_number + " ");
Console.Write("=");
}
writter.WriteLine(line);
writting_size += line.Length;
var progress = writting_size / (file_size_limit_gb * one_gigabyte) * 100;
if ((int)(progress / 10) > progression_step)
{
progression_step++;
Console.Write("=");
}
if (writting_size > file_size_limit_gb * one_gigabyte)
{
writter?.Dispose();
writter = null;
writting_size = 0;
progression_step = 0;
Console.WriteLine("");
}
}
}
}
finally
{
writter?.Dispose();
Console.WriteLine("");
Console.WriteLine("Done.");
}
}
else
Console.WriteLine("Invalid selection");
}
}
private static string readLineWithCancel()
{
string result = null;
var buffer = new StringBuilder();
var info = Console.ReadKey(true);
while (info.Key != ConsoleKey.Enter
&& info.Key != ConsoleKey.Escape)
{
Console.Write(info.KeyChar);
buffer.Append(info.KeyChar);
info = Console.ReadKey(true);
}
if (info.Key == ConsoleKey.Enter)
result = buffer.ToString();
if (info.Key == ConsoleKey.Escape)
throw new Exception();
return result;
}
}
}