More IO:  Strings, Files, and Web Pages

  1. Other Input Sources
     
    1. Read from a String
      import java.util.Scanner; public class MyCode { public static void main(String[] args) { String s = "Douglas Noel Adams"; Scanner scanner = new Scanner(s); while (scanner.hasNext()) System.out.println(scanner.next()); } }

      Another example (with a custom delimiter):

      import java.util.Scanner; public class MyCode { public static void main(String[] args) { String s = "Rome,Paris,Peoria,London,Tokyo"; Scanner scanner = new Scanner(s); scanner.useDelimiter(","); while (scanner.hasNext()) System.out.println(scanner.next()); } }
    2. Read from a File
       
      1. Word-at-a-time (excluding whitespace)
        import java.util.Scanner; public class MyCode { public static void main(String[] args) { Scanner scanner = getFileScanner("SampleFile.txt"); while (scanner.hasNext()) System.out.println(scanner.next()); }
        // Convenient helper method for reading from a file. // Returns null if the file is not found (or for any other error). // You are responsible for using this method, but not // for writing it (neither on homeworks or tests)! public static Scanner getFileScanner(String filename) { Scanner scanner = null; try { java.io.File file = new java.io.File(filename); if (file.isAbsolute() == true) { scanner = new Scanner(file); } else { // treat relative files as resources so they work in jar files! // Note that we cannot use the current thread's classLoader, // since this does not work appropriately on Vista nor on some Macs. java.io.InputStream is = getDefiningClass().getResourceAsStream(filename); scanner = new Scanner(new java.io.BufferedReader( new java.io.InputStreamReader(is))); } } catch (Exception e) { System.out.println("File not found: " + filename); return null; } return scanner; } // Return an instance of the Class class representing the // class in which this method is defined. public static Class getDefiningClass() { try { StackTraceElement[] em = new Exception().getStackTrace(); return Class.forName(em[0].getClassName()); } catch (Exception e) { throw new RuntimeException("Cannot find current class"); } }
      2. Line-at-a-time (including whitespace)
        public static void main(String[] args) { Scanner scanner = getFileScanner("SampleFile.txt"); while (scanner.hasNext()) System.out.println(scanner.nextLine()); }
    3. (And Write to a File)
      import java.io.PrintStream; public class MyCode { public static void main(String[] args) { PrintStream out = getFilePrintStream("MyOutput.txt"); out.println("This will output to the file 'MyOutput.txt'"); } // Convenient helper method for writing to a file. // Returns null if the file cannot be opened (or for any other error). // You are responsible for using this method, but not // for writing it (neither on homeworks or tests)! public static PrintStream getFilePrintStream(String filename) { PrintStream out = null; try { out = new PrintStream(new java.io.File(filename)); } catch (Exception e) { System.out.println("Error opening file " + filename); return null; } return out; } }
    4. Read from a Web Page
       
      1. As HTML
        import java.util.Scanner; public class MyCode { public static void main(String[] args) { String url = "http://kosbie.net/cmu/fall-08/15-100/handouts/parsely.html"; Scanner scanner = getUrlScanner(url); while (scanner.hasNext()) System.out.println(scanner.nextLine()); } // Convenient helper method for reading from a web page (url). // Returns null if the page cannot be opened (or for any other error). // You are responsible for using this method, but not // for writing it (neither on homeworks or tests)! public static Scanner getUrlScanner(String url) { Scanner scanner = null; try { scanner = new Scanner(new java.net.URL(url).openStream()); } catch (Exception e) { System.out.println("Error opening url " + url); return null; } return scanner; } }
      2. As Plain Text
        import java.util.Scanner; public class MyCode { public static void main(String[] args) { String url = "http://kosbie.net/cmu/fall-08/15-100/handouts/parsely.html"; Scanner scanner = getUrlTextScanner(url); while (scanner.hasNext()) System.out.println(scanner.nextLine()); } // Convenient helper method for reading from a web page (url) as plain text. // Returns null if the page cannot be opened (or for any other error). // On some pages, especially if they contain XML, the spaces may be elided // (sothetextislikethis) -- in that case, try setting the second parameter // to " " or "\n", so spaces or newlines are added after each parsed element. // You are responsible for using this method, but not // for writing it (neither on homeworks or tests)! public static Scanner getUrlTextScanner(String url) { return getUrlTextScanner(url, null); }
          public static Scanner getUrlTextScanner(String url, final String dataDelimiter) {
            Scanner scanner = null;
            try {
              final StringBuffer sb = new StringBuffer();
              java.io.InputStreamReader reader = new java.io.InputStreamReader(
                                              new java.net.URL(url).openStream());
              javax.swing.text.html.HTMLEditorKit.ParserCallback parser =
                new javax.swing.text.html.HTMLEditorKit.ParserCallback() {
                  public void handleText(char[] data, int pos) {
                    if (data != null) {
                      sb.append(data);
                      if (dataDelimiter != null) sb.append(dataDelimiter);
                    }
                  }
                  public void handleSimpleTag(javax.swing.text.html.HTML.Tag tag,
                                    javax.swing.text.MutableAttributeSet a,
                                    int pos) {
                   if (tag.breaksFlow()) sb.append("\n");
                  }
                };
              new javax.swing.text.html.parser.ParserDelegator().parse(reader, parser, true);
              scanner = new Scanner(sb.toString());
            }
            catch (Exception e) {
              System.out.println("Error opening text reader for url: " + url);
              return null;
            }
            return scanner;
          }
        }