//************************************** // Name: Html tag stripper // Description:reads in a Html file, removes the tags and sends the output to an output.txt file // By: John Hunsley (from psc cd) // // // Inputs:a Html file // // Returns:a text file // //Assumes:only works on JSDK 1.4 and above // //Side Effects:none //************************************** /* *this program reads in a text file, *puts a carrage return in at the end of *each sentance. then sends that it to *an output text file *@author John Hunsley *@version 1.0 25/02/03 * */ import java.util.*; import java.io.*; class HtmlStripper { public static void main(String args[]) { StringBuffer fileAsStringBuffer = new StringBuffer();//string buffer for file input String fileAsString = "intialized";//the file as a string /*****************read the whole file into a StringBuffer, line by line********************/ try{ BufferedReader input = new BufferedReader( new FileReader("text.txt")); String line; while((line = input.readLine()) != null) { fileAsStringBuffer.append(line); } input.close();//close the buffer System.out.println("file saved as string"); fileAsString = fileAsStringBuffer.toString();//set the String in the buffer as a String } catch(IOException e){ System.out.println("IO Exception occured"); } /************strip tags******************/ fileAsString = fileAsString.replaceAll("\\<.*?\\>","");//strips all html tags //write the String out to a text file try{ File file = new File("output.txt"); PrintWriter output = new PrintWriter(new FileWriter(file)); output.println(fileAsString); output.close(); System.out.println("String written to output text file"); JOptionPane.showMessageDialog(null, "String written to output text file"); } catch(IOException e){ System.out.println("IO Exception occured"); } }