I have a program to split .txt files that divides it in 101 files. The file that must be divided File.txt contains url separated by \n. The fact is that the program divides the file in equal parts and when it reaches the maximum size it cuts the url and starts a new file. How can I do to split it in a size that don't overshoot 1Mb and contains the url well splited?
import java.io.*;
import java.util.Scanner;
public class readfile {
public static int SubfileName;
public static int[] Murl = new int[2000000];
public static int x = 0;
public static long usemem = 0;
public static long Numberofmailto = 0;
static byte[] subfich; //Subfile data (global var)
static long NumberUrl;
static int[] indURL; //Indices de las URLs en "subfich"
public static void main(String[] args) {
Scanner in = new Scanner(System.in);
System.out.println("Enter the file name like url.txt to read but it should be in E:\\url\\");
String name = in.nextLine();
readfile(name);
try {
//now create 100 subfile
GeneraFicheros();
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
// not used
public static void readfile(String filename) {
try {
// file path
leeSubfichero("E:\\url\\" + filename);
creaIndices();
} catch (Exception e) {
}
}
//read danger file
static void leeSubfichero(String nomfich) throws IOException { // read file
File fich = new File(nomfich);
int tam = (int) fich.length(); //Tamaño bytes // size byte
subfich = null;
subfich = new byte[tam];
try (FileInputStream fis = new FileInputStream(fich)) {
NumberUrl = fis.read(subfich);
// find the mailto urls
}
}
static void creaIndices() {
// 1. Count the number of URLs
int n = 0;
int x = 0;
boolean dangerurl = false;
for (int i = 0; i < subfich.length; i++) {
if (subfich[i] == 10) {
n++;
}
}
//2. Store separators position
indURL = null;
indURL = new int[n];
//Murl = new int[n];
int k = 0;
for (int i = 0; i < subfich.length; i++) {
if (subfich[i] == 10) {
indURL[k++] = i;
}
}
}
// create 100 files
public static void GeneraFicheros() throws Exception {
String zero = "00";
RandomAccessFile raf = new RandomAccessFile("E:\\url\\danger.txt", "r");
long numSplits = 100; //divid in 100 subfiles
long sourceSize = raf.length(); // danger.txt file size
long bytesPerSplit = sourceSize / numSplits; // number of bytes each file will have
long remainingBytes = sourceSize % numSplits;
int maxReadBufferSize = 8 * 1024; //8KB
for (int destIx = 1; destIx <= numSplits; destIx++) {
// each literation create a new file like 000
System.out.println("Escrito Subfichero " + zero + destIx + ".txt");
runtime();
if (destIx > 9) {
zero = "0";
}
// write the file with name like 000.txt
BufferedOutputStream bw = new BufferedOutputStream(new FileOutputStream("E:\\url\\" + zero + destIx + ".txt"));
if (bytesPerSplit > maxReadBufferSize) {
// total number of bytes to read
long numReads = bytesPerSplit / maxReadBufferSize;
// total number of bytes remaining for other files
long numRemainingRead = bytesPerSplit % maxReadBufferSize;
for (int i = 0; i < numReads; i++) {
readWrite(raf, bw, maxReadBufferSize);
}
// if bytes are remaining write the file
if (numRemainingRead > 0) {
readWrite(raf, bw, numRemainingRead);
}
} else {
readWrite(raf, bw, bytesPerSplit);
}
bw.close();
}
// if dividion didn't work extra store here
if (remainingBytes > 0) {
BufferedOutputStream bw = new BufferedOutputStream(new FileOutputStream("split." + (numSplits + 1) + ".txt"));
readWrite(raf, bw, remainingBytes);
bw.close();
}
raf.close();
}
// write 8kb each time in the file
static void readWrite(RandomAccessFile raf, BufferedOutputStream bw, long numBytes) throws IOException {
byte[] buf = new byte[(int) numBytes];
int val = raf.read(buf);
if (val != -1) {
bw.write(buf);
}
}
static long startTime = System.nanoTime();
public static void runtime() {
long endTime = System.nanoTime();
long totalTime = endTime - startTime;
double seconds = (double) totalTime / 1000000000.0;
System.out.println("Toatl seconds" + seconds);
}
}