-->read around 2000 files stored in a folder
-->for each file extract the words using split()
-->store these words in an array
--->print them
Could you'll please suugest a way I can improve the program. Because now I want to do the following tasks:
-->For each word remove punctuation such as .,()'" etc
-->If the word is an html tag <Title> , don't store it.
Thank you!!
package FirstTry;
import java.io.*;
import java.util.*;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
public class FirstTry2
{
public static void main(String[] args)
{
try
{
File dir = new File(
"Path of folder containing around 2000 text files");
for (File fn : dir.listFiles())
{
FileInputStream fstream = new FileInputStream(fn);
DataInputStream in = new DataInputStream(fstream);
BufferedReader br = new BufferedReader(new InputStreamReader(in));
String strLine;
while ((strLine = br.readLine()) != null)
{
String[] words = strLine.split("\\s+");
for (String s: words)
{
System.out.println(s);
}
}
br.close();
in.close();
}
}
catch (FileNotFoundException e)
{
e.printStackTrace();
}
catch (IOException e)
{
e.printStackTrace();
}
}
}

New Topic/Question
Reply



MultiQuote



|