1   /* FileUtils
2    *
3    * $Id: FileUtils.java 5503 2007-09-28 22:13:00Z gojomo $
4    *
5    * Created on Feb 2, 2004
6    *
7    * Copyright (C) 2004 Internet Archive.
8    *
9    * This file is part of the Heritrix web crawler (crawler.archive.org).
10   *
11   * Heritrix is free software; you can redistribute it and/or modify
12   * it under the terms of the GNU Lesser Public License as published by
13   * the Free Software Foundation; either version 2.1 of the License, or
14   * any later version.
15   *
16   * Heritrix is distributed in the hope that it will be useful,
17   * but WITHOUT ANY WARRANTY; without even the implied warranty of
18   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19   * GNU Lesser Public License for more details.
20   *
21   * You should have received a copy of the GNU Lesser Public License
22   * along with Heritrix; if not, write to the Free Software
23   * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
24   */
25  package org.archive.util;
26  
27  import java.io.BufferedReader;
28  import java.io.File;
29  import java.io.FileFilter;
30  import java.io.FileInputStream;
31  import java.io.FileNotFoundException;
32  import java.io.FileOutputStream;
33  import java.io.FilenameFilter;
34  import java.io.IOException;
35  import java.io.InputStreamReader;
36  import java.nio.channels.FileChannel;
37  import java.util.Arrays;
38  import java.util.HashSet;
39  import java.util.Iterator;
40  import java.util.List;
41  import java.util.Set;
42  import java.util.logging.Level;
43  import java.util.logging.Logger;
44  import java.util.regex.Pattern;
45  
46  
47  /*** Utility methods for manipulating files and directories.
48   *
49   * @author John Erik Halse
50   */
51  public class FileUtils {
52      private static final Logger LOGGER =
53          Logger.getLogger(FileUtils.class.getName());
54      
55      public static final File TMPDIR =
56          new File(System.getProperty("java.io.tmpdir", "/tmp"));
57      
58      private static final boolean DEFAULT_OVERWRITE = true;
59      
60      /***
61       * Constructor made private because all methods of this class are static.
62       */
63      private FileUtils() {
64          super();
65      }
66      
67      public static int copyFiles(final File srcDir, Set srcFile,
68              final File dest)
69      throws IOException {
70          int count = 0;
71          for (Iterator i = srcFile.iterator(); i.hasNext();) {
72              String name = (String)i.next();
73              File src = new File(srcDir, name);
74              File tgt = new File(dest, name);
75              if (LOGGER.isLoggable(Level.FINE)) {
76                  LOGGER.fine("Before " + src.getAbsolutePath() + " " +
77                      src.exists() + ", " + tgt.getAbsolutePath() + " " +
78                      tgt.exists());
79              }
80              copyFiles(src, tgt);
81              if (LOGGER.isLoggable(Level.FINE)) {
82                  LOGGER.fine("After " + src.getAbsolutePath() + " " +
83                      src.exists() + ", " + tgt.getAbsolutePath() + " " +
84                      tgt.exists());
85              }
86              count++;
87          }
88          return count;
89      }
90  
91      /*** Recursively copy all files from one directory to another.
92       *
93       * @param src file or directory to copy from.
94       * @param dest file or directory to copy to.
95       * @throws IOException
96       */
97      public static void copyFiles(File src, File dest)
98      throws IOException {
99          copyFiles(src, null, dest, false, true, null);
100     }
101     
102     /***
103      * @param src Directory of files to fetch.
104      * @param filter Filter to apply to filenames.
105      * @return Files in directory sorted.
106      */
107     public static String [] getSortedDirContent(final File src,
108             final FilenameFilter filter) {
109         if (!src.exists()) {
110             if (LOGGER.isLoggable(Level.FINE)) {
111                 LOGGER.fine(src.getAbsolutePath() + " does not exist");
112             }
113             return null;
114         }
115        
116         if (!src.isDirectory()) {
117             if (LOGGER.isLoggable(Level.FINE)) {
118                 LOGGER.fine(src.getAbsolutePath() + " is not directory.");
119             }
120             return null;
121         }
122         // Go through the contents of the directory
123         String [] list = (filter == null)? src.list(): src.list(filter);
124         if (list != null) {
125             Arrays.sort(list);
126         }
127         return list;
128     }
129         
130     /***
131      * Recursively copy all files from one directory to another.
132      * 
133      * @param src File or directory to copy from.
134      * @param filter Filename filter to apply to src. May be null if no
135      * filtering wanted.
136      * @param dest File or directory to copy to.
137      * @param inSortedOrder Copy in order of natural sort.
138      * @param overwrite If target file already exits, and this parameter is
139      * true, overwrite target file (We do this by first deleting the target
140      * file before we begin the copy).
141      * @throws IOException
142      */
143     public static void copyFiles(final File src, final FilenameFilter filter,
144         final File dest, final boolean inSortedOrder, final boolean overwrite)
145     throws IOException {
146         copyFiles(src, filter, dest, inSortedOrder, overwrite, null);
147     }
148 
149     /***
150      * Recursively copy all files from one directory to another.
151      * 
152      * @param src File or directory to copy from.
153      * @param filter Filename filter to apply to src. May be null if no
154      * filtering wanted.
155      * @param dest File or directory to copy to.
156      * @param inSortedOrder Copy in order of natural sort.
157      * @param overwrite If target file already exits, and this parameter is
158      * true, overwrite target file (We do this by first deleting the target
159      * file before we begin the copy).
160      * @param exceptions if non-null, add any individual-file IOExceptions
161      * to this List rather than throwing, and proceed with the deep copy
162      * @return TODO
163      * @throws IOException
164      */
165     public static void copyFiles(final File src, final FilenameFilter filter,
166         final File dest, final boolean inSortedOrder, final boolean overwrite, 
167         List<IOException> exceptions)
168     throws IOException {
169         // TODO: handle failures at any step
170         if (!src.exists()) {
171             if (LOGGER.isLoggable(Level.FINE)) {
172                 LOGGER.fine(src.getAbsolutePath() + " does not exist");
173             }
174             return;
175         }
176 
177         if (src.isDirectory()) {
178             if (LOGGER.isLoggable(Level.FINE)) {
179                 LOGGER.fine(src.getAbsolutePath() + " is a directory.");
180             }
181             // Create destination directory
182             if (!dest.exists()) {
183                 dest.mkdirs();
184             }
185             // Go through the contents of the directory
186             String list[] = (filter == null)? src.list(): src.list(filter);
187             if (inSortedOrder) {
188                 Arrays.sort(list);
189             }
190             for (int i = 0; i < list.length; i++) {
191                 copyFiles(new File(src, list[i]), filter,
192                     new File(dest, list[i]), inSortedOrder, overwrite, exceptions);
193             }
194         } else {
195             try {
196                 copyFile(src, dest, overwrite);
197             } catch (IOException ioe) {
198                 if (exceptions != null) {
199                     exceptions.add(ioe);
200                 } else {
201                     // rethrow
202                     throw ioe;
203                 }
204             }
205         }
206     }
207 
208     /***
209      * Copy the src file to the destination.
210      * 
211      * @param src
212      * @param dest
213      * @return True if the extent was greater than actual bytes copied.
214      * @throws FileNotFoundException
215      * @throws IOException
216      */
217     public static boolean copyFile(final File src, final File dest)
218     throws FileNotFoundException, IOException {
219         return copyFile(src, dest, -1, DEFAULT_OVERWRITE);
220     }
221     
222     /***
223      * Copy the src file to the destination.
224      * 
225      * @param src
226      * @param dest
227      * @param overwrite If target file already exits, and this parameter is
228      * true, overwrite target file (We do this by first deleting the target
229      * file before we begin the copy).
230      * @return True if the extent was greater than actual bytes copied.
231      * @throws FileNotFoundException
232      * @throws IOException
233      */
234     public static boolean copyFile(final File src, final File dest,
235         final boolean overwrite)
236     throws FileNotFoundException, IOException {
237         return copyFile(src, dest, -1, overwrite);
238     }
239     
240     /***
241      * Copy up to extent bytes of the source file to the destination
242      *
243      * @param src
244      * @param dest
245      * @param extent Maximum number of bytes to copy
246      * @return True if the extent was greater than actual bytes copied.
247      * @throws FileNotFoundException
248      * @throws IOException
249      */
250     public static boolean copyFile(final File src, final File dest,
251         long extent)
252     throws FileNotFoundException, IOException {
253         return copyFile(src, dest, extent, DEFAULT_OVERWRITE);
254     }
255 
256 	/***
257      * Copy up to extent bytes of the source file to the destination
258      *
259      * @param src
260      * @param dest
261      * @param extent Maximum number of bytes to copy
262 	 * @param overwrite If target file already exits, and this parameter is
263      * true, overwrite target file (We do this by first deleting the target
264      * file before we begin the copy).
265 	 * @return True if the extent was greater than actual bytes copied.
266      * @throws FileNotFoundException
267      * @throws IOException
268      */
269     public static boolean copyFile(final File src, final File dest,
270         long extent, final boolean overwrite)
271     throws FileNotFoundException, IOException {
272         boolean result = false;
273         if (LOGGER.isLoggable(Level.FINE)) {
274             LOGGER.fine("Copying file " + src + " to " + dest + " extent " +
275                 extent + " exists " + dest.exists());
276         }
277         if (dest.exists()) {
278             if (overwrite) {
279                 dest.delete();
280                 LOGGER.finer(dest.getAbsolutePath() + " removed before copy.");
281             } else {
282                 // Already in place and we're not to overwrite.  Return.
283                 return result;
284             }
285         }
286         FileInputStream fis = null;
287         FileOutputStream fos = null;
288         FileChannel fcin = null;
289         FileChannel fcout = null;
290         try {
291             // Get channels
292             fis = new FileInputStream(src);
293             fos = new FileOutputStream(dest);
294             fcin = fis.getChannel();
295             fcout = fos.getChannel();
296             if (extent < 0) {
297                 extent = fcin.size();
298             }
299 
300             // Do the file copy
301             long trans = fcin.transferTo(0, extent, fcout);
302             if (trans < extent) {
303                 result = false;
304             }
305             result = true; 
306         } catch (IOException e) {
307             // Add more info to the exception. Preserve old stacktrace.
308             // We get 'Invalid argument' on some file copies. See
309             // http://intellij.net/forums/thread.jsp?forum=13&thread=63027&message=853123
310             // for related issue.
311             String message = "Copying " + src.getAbsolutePath() + " to " +
312                 dest.getAbsolutePath() + " with extent " + extent +
313                 " got IOE: " + e.getMessage();
314             if (e.getMessage().equals("Invalid argument")) {
315                 LOGGER.severe("Failed copy, trying workaround: " + message);
316                 workaroundCopyFile(src, dest);
317             } else {
318                 LOGGER.log(Level.SEVERE,message,e);
319                 // rethrow
320                 throw e;
321             }
322         } finally {
323             // finish up
324             if (fcin != null) {
325                 fcin.close();
326             }
327             if (fcout != null) {
328                 fcout.close();
329             }
330             if (fis != null) {
331                 fis.close();
332             }
333             if (fos != null) {
334                 fos.close();
335             }
336         }
337         return result;
338     }
339     
340     protected static void workaroundCopyFile(final File src,
341             final File dest)
342     throws IOException {
343         FileInputStream from = null;
344         FileOutputStream to = null;
345         try {
346             from = new FileInputStream(src);
347             to = new FileOutputStream(dest);
348             byte[] buffer = new byte[4096];
349             int bytesRead;
350             while ((bytesRead = from.read(buffer)) != -1) {
351                 to.write(buffer, 0, bytesRead);
352             }
353         } finally {
354             if (from != null) {
355                 try {
356                     from.close();
357                 } catch (IOException e) {
358                     e.printStackTrace();
359                 }
360             }
361             if (to != null) {
362                 try {
363                     to.close();
364                 } catch (IOException e) {
365                     e.printStackTrace();
366                 }
367             }
368         }
369     }
370 
371 	/*** Deletes all files and subdirectories under dir.
372      * @param dir
373      * @return true if all deletions were successful. If a deletion fails, the
374      *          method stops attempting to delete and returns false.
375      */
376     public static boolean deleteDir(File dir) {
377         if (dir.isDirectory()) {
378             String[] children = dir.list();
379             for (int i=0; i<children.length; i++) {
380                 boolean success = deleteDir(new File(dir, children[i]));
381                 if (!success) {
382                     return false;
383                 }
384             }
385         }
386         // The directory is now empty so delete it
387         return dir.delete();
388     }
389 
390 
391 
392     /***
393      * Utility method to read an entire file as a String.
394      *
395      * @param file
396      * @return File as String.
397      * @throws IOException
398      */
399     public static String readFileAsString(File file) throws IOException {
400         StringBuffer sb = new StringBuffer((int) file.length());
401         String line;
402         BufferedReader br = new BufferedReader(new InputStreamReader(
403         		new FileInputStream(file)));
404         try {
405         	    line = br.readLine();
406         	    while (line != null) {
407         	    	    sb.append(line);
408                         sb.append("\n");
409         	    	    line = br.readLine();
410         	    }
411         } finally {
412         	    br.close();
413         }
414         return sb.toString();
415     }
416 
417     /***
418      * Get a list of all files in directory that have passed prefix.
419      *
420      * @param dir Dir to look in.
421      * @param prefix Basename of files to look for. Compare is case insensitive.
422      *
423      * @return List of files in dir that start w/ passed basename.
424      */
425     public static File [] getFilesWithPrefix(File dir, final String prefix) {
426         FileFilter prefixFilter = new FileFilter() {
427                 public boolean accept(File pathname)
428                 {
429                     return pathname.getName().toLowerCase().
430                         startsWith(prefix.toLowerCase());
431                 }
432             };
433         return dir.listFiles(prefixFilter);
434     }
435 
436     /*** Get a @link java.io.FileFilter that filters files based on a regular
437      * expression.
438      *
439      * @param regexp the regular expression the files must match.
440      * @return the newly created filter.
441      */
442     public static FileFilter getRegexpFileFilter(String regexp) {
443         // Inner class defining the RegexpFileFilter
444         class RegexpFileFilter implements FileFilter {
445             Pattern pattern;
446 
447             protected RegexpFileFilter(String re) {
448                 pattern = Pattern.compile(re);
449             }
450 
451             public boolean accept(File pathname) {
452                 return pattern.matcher(pathname.getName()).matches();
453             }
454         }
455 
456         return new RegexpFileFilter(regexp);
457     }
458     
459     /***
460      * Use for case where files are being added to src.  Will break off copy
461      * when tgt is same as src.
462      * @param src Source directory to copy from.
463      * @param tgt Target to copy to.
464      * @param filter Filter to apply to files to copy.
465      * @throws IOException
466      */
467     public static void syncDirectories(final File src,
468             final FilenameFilter filter, final File tgt)
469     throws IOException {
470         Set<String> srcFilenames = null;
471         do {
472             srcFilenames = new HashSet<String>(Arrays.asList(src.list(filter)));
473             List<String> tgtFilenames = Arrays.asList(tgt.list(filter));
474             srcFilenames.removeAll(tgtFilenames);
475             if (srcFilenames.size() > 0) {
476                 int count = FileUtils.copyFiles(src, srcFilenames, tgt);
477                 if (LOGGER.isLoggable(Level.FINE)) {
478                     LOGGER.fine("Copied " + count);
479                 }
480             }
481         } while (srcFilenames != null && srcFilenames.size() > 0);
482     }
483     
484     /***
485      * Test file exists and is readable.
486      * @param f File to test.
487      * @exception IOException If file does not exist or is not unreadable.
488      */
489     public static File isReadable(final File f) throws IOException {
490         if (!f.exists()) {
491             throw new FileNotFoundException(f.getAbsolutePath() +
492                 " does not exist.");
493         }
494 
495         if (!f.canRead()) {
496             throw new FileNotFoundException(f.getAbsolutePath() +
497                 " is not readable.");
498         }
499         
500         return f;
501     }
502     
503     /***
504      * @param f File to test.
505      * @return True if file is readable, has uncompressed extension,
506      * and magic string at file start.
507      * @exception IOException If file does not exist or is not readable.
508      */
509     public static boolean isReadableWithExtensionAndMagic(final File f, 
510             final String uncompressedExtension, final String magic)
511     throws IOException {
512         boolean result = false;
513         FileUtils.isReadable(f);
514         if(f.getName().toLowerCase().endsWith(uncompressedExtension)) {
515             FileInputStream fis = new FileInputStream(f);
516             try {
517                 byte [] b = new byte[magic.length()];
518                 int read = fis.read(b, 0, magic.length());
519                 fis.close();
520                 if (read == magic.length()) {
521                     StringBuffer beginStr
522                         = new StringBuffer(magic.length());
523                     for (int i = 0; i < magic.length(); i++) {
524                         beginStr.append((char)b[i]);
525                     }
526                     
527                     if (beginStr.toString().
528                             equalsIgnoreCase(magic)) {
529                         result = true;
530                     }
531                 }
532             } finally {
533                 fis.close();
534             }
535         }
536 
537         return result;
538     }
539     
540     /***
541      * Turn path into a File, relative to context (which may be ignored 
542      * if path is absolute). 
543      * 
544      * @param context File context if path is relative
545      * @param path String path to make into a File
546      * @return File created
547      */
548     public static File maybeRelative(File context, String path) {
549         File f = new File(path);
550         if(f.isAbsolute()) {
551             return f;
552         }
553         return new File(context, path);
554     }
555 }