001 /**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements. See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership. The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License. You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018 package org.apache.hadoop.hdfs.util;
019
020 import java.io.BufferedReader;
021 import java.io.File;
022 import java.io.FileInputStream;
023 import java.io.IOException;
024 import java.io.InputStream;
025 import java.io.InputStreamReader;
026 import java.security.DigestInputStream;
027 import java.security.MessageDigest;
028 import java.util.regex.Matcher;
029 import java.util.regex.Pattern;
030
031 import org.apache.commons.logging.Log;
032 import org.apache.commons.logging.LogFactory;
033 import org.apache.hadoop.io.IOUtils;
034 import org.apache.hadoop.io.MD5Hash;
035 import org.apache.hadoop.util.StringUtils;
036
037 import com.google.common.base.Charsets;
038
039 /**
040 * Static functions for dealing with files of the same format
041 * that the Unix "md5sum" utility writes.
042 */
043 public abstract class MD5FileUtils {
044 private static final Log LOG = LogFactory.getLog(
045 MD5FileUtils.class);
046
047 public static final String MD5_SUFFIX = ".md5";
048 private static final Pattern LINE_REGEX =
049 Pattern.compile("([0-9a-f]{32}) [ \\*](.+)");
050
051 /**
052 * Verify that the previously saved md5 for the given file matches
053 * expectedMd5.
054 * @throws IOException
055 */
056 public static void verifySavedMD5(File dataFile, MD5Hash expectedMD5)
057 throws IOException {
058 MD5Hash storedHash = readStoredMd5ForFile(dataFile);
059 // Check the hash itself
060 if (!expectedMD5.equals(storedHash)) {
061 throw new IOException(
062 "File " + dataFile + " did not match stored MD5 checksum " +
063 " (stored: " + storedHash + ", computed: " + expectedMD5);
064 }
065 }
066
067 /**
068 * Read the md5 checksum stored alongside the given file, or null
069 * if no md5 is stored.
070 * @param dataFile the file containing data
071 * @return the checksum stored in dataFile.md5
072 */
073 public static MD5Hash readStoredMd5ForFile(File dataFile) throws IOException {
074 File md5File = getDigestFileForFile(dataFile);
075
076 String md5Line;
077
078 if (!md5File.exists()) {
079 return null;
080 }
081
082 BufferedReader reader =
083 new BufferedReader(new InputStreamReader(new FileInputStream(
084 md5File), Charsets.UTF_8));
085 try {
086 md5Line = reader.readLine();
087 if (md5Line == null) { md5Line = ""; }
088 md5Line = md5Line.trim();
089 } catch (IOException ioe) {
090 throw new IOException("Error reading md5 file at " + md5File, ioe);
091 } finally {
092 IOUtils.cleanup(LOG, reader);
093 }
094
095 Matcher matcher = LINE_REGEX.matcher(md5Line);
096 if (!matcher.matches()) {
097 throw new IOException("Invalid MD5 file at " + md5File
098 + " (does not match expected pattern)");
099 }
100 String storedHash = matcher.group(1);
101 File referencedFile = new File(matcher.group(2));
102
103 // Sanity check: Make sure that the file referenced in the .md5 file at
104 // least has the same name as the file we expect
105 if (!referencedFile.getName().equals(dataFile.getName())) {
106 throw new IOException(
107 "MD5 file at " + md5File + " references file named " +
108 referencedFile.getName() + " but we expected it to reference " +
109 dataFile);
110 }
111 return new MD5Hash(storedHash);
112 }
113
114 /**
115 * Read dataFile and compute its MD5 checksum.
116 */
117 public static MD5Hash computeMd5ForFile(File dataFile) throws IOException {
118 InputStream in = new FileInputStream(dataFile);
119 try {
120 MessageDigest digester = MD5Hash.getDigester();
121 DigestInputStream dis = new DigestInputStream(in, digester);
122 IOUtils.copyBytes(dis, new IOUtils.NullOutputStream(), 128*1024);
123
124 return new MD5Hash(digester.digest());
125 } finally {
126 IOUtils.closeStream(in);
127 }
128 }
129
130 /**
131 * Save the ".md5" file that lists the md5sum of another file.
132 * @param dataFile the original file whose md5 was computed
133 * @param digest the computed digest
134 * @throws IOException
135 */
136 public static void saveMD5File(File dataFile, MD5Hash digest)
137 throws IOException {
138 File md5File = getDigestFileForFile(dataFile);
139 String digestString = StringUtils.byteToHexString(
140 digest.getDigest());
141 String md5Line = digestString + " *" + dataFile.getName() + "\n";
142
143 AtomicFileOutputStream afos = new AtomicFileOutputStream(md5File);
144 afos.write(md5Line.getBytes(Charsets.UTF_8));
145 afos.close();
146 LOG.debug("Saved MD5 " + digest + " to " + md5File);
147 }
148
149 /**
150 * @return a reference to the file with .md5 suffix that will
151 * contain the md5 checksum for the given data file.
152 */
153 public static File getDigestFileForFile(File file) {
154 return new File(file.getParentFile(), file.getName() + MD5_SUFFIX);
155 }
156 }