001 /**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements. See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership. The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License. You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018 package org.apache.hadoop.hdfs.util;
019
020 import java.io.BufferedReader;
021 import java.io.File;
022 import java.io.FileInputStream;
023 import java.io.FileNotFoundException;
024 import java.io.IOException;
025 import java.io.InputStream;
026 import java.io.InputStreamReader;
027 import java.security.DigestInputStream;
028 import java.security.MessageDigest;
029 import java.util.regex.Matcher;
030 import java.util.regex.Pattern;
031
032 import org.apache.commons.logging.Log;
033 import org.apache.commons.logging.LogFactory;
034 import org.apache.hadoop.io.IOUtils;
035 import org.apache.hadoop.io.MD5Hash;
036 import org.apache.hadoop.util.StringUtils;
037
038 import com.google.common.base.Charsets;
039
040 /**
041 * Static functions for dealing with files of the same format
042 * that the Unix "md5sum" utility writes.
043 */
044 public abstract class MD5FileUtils {
045 private static final Log LOG = LogFactory.getLog(
046 MD5FileUtils.class);
047
048 public static final String MD5_SUFFIX = ".md5";
049 private static final Pattern LINE_REGEX =
050 Pattern.compile("([0-9a-f]{32}) [ \\*](.+)");
051
052 /**
053 * Verify that the previously saved md5 for the given file matches
054 * expectedMd5.
055 * @throws IOException
056 */
057 public static void verifySavedMD5(File dataFile, MD5Hash expectedMD5)
058 throws IOException {
059 MD5Hash storedHash = readStoredMd5ForFile(dataFile);
060 // Check the hash itself
061 if (!expectedMD5.equals(storedHash)) {
062 throw new IOException(
063 "File " + dataFile + " did not match stored MD5 checksum " +
064 " (stored: " + storedHash + ", computed: " + expectedMD5);
065 }
066 }
067
068 /**
069 * Read the md5 file stored alongside the given data file
070 * and match the md5 file content.
071 * @param dataFile the file containing data
072 * @return a matcher with two matched groups
073 * where group(1) is the md5 string and group(2) is the data file path.
074 */
075 private static Matcher readStoredMd5(File md5File) throws IOException {
076 BufferedReader reader =
077 new BufferedReader(new InputStreamReader(new FileInputStream(
078 md5File), Charsets.UTF_8));
079 String md5Line;
080 try {
081 md5Line = reader.readLine();
082 if (md5Line == null) { md5Line = ""; }
083 md5Line = md5Line.trim();
084 } catch (IOException ioe) {
085 throw new IOException("Error reading md5 file at " + md5File, ioe);
086 } finally {
087 IOUtils.cleanup(LOG, reader);
088 }
089
090 Matcher matcher = LINE_REGEX.matcher(md5Line);
091 if (!matcher.matches()) {
092 throw new IOException("Invalid MD5 file " + md5File + ": the content \""
093 + md5Line + "\" does not match the expected pattern.");
094 }
095 return matcher;
096 }
097
098 /**
099 * Read the md5 checksum stored alongside the given data file.
100 * @param dataFile the file containing data
101 * @return the checksum stored in dataFile.md5
102 */
103 public static MD5Hash readStoredMd5ForFile(File dataFile) throws IOException {
104 final File md5File = getDigestFileForFile(dataFile);
105 if (!md5File.exists()) {
106 return null;
107 }
108
109 final Matcher matcher = readStoredMd5(md5File);
110 String storedHash = matcher.group(1);
111 File referencedFile = new File(matcher.group(2));
112
113 // Sanity check: Make sure that the file referenced in the .md5 file at
114 // least has the same name as the file we expect
115 if (!referencedFile.getName().equals(dataFile.getName())) {
116 throw new IOException(
117 "MD5 file at " + md5File + " references file named " +
118 referencedFile.getName() + " but we expected it to reference " +
119 dataFile);
120 }
121 return new MD5Hash(storedHash);
122 }
123
124 /**
125 * Read dataFile and compute its MD5 checksum.
126 */
127 public static MD5Hash computeMd5ForFile(File dataFile) throws IOException {
128 InputStream in = new FileInputStream(dataFile);
129 try {
130 MessageDigest digester = MD5Hash.getDigester();
131 DigestInputStream dis = new DigestInputStream(in, digester);
132 IOUtils.copyBytes(dis, new IOUtils.NullOutputStream(), 128*1024);
133
134 return new MD5Hash(digester.digest());
135 } finally {
136 IOUtils.closeStream(in);
137 }
138 }
139
140 /**
141 * Save the ".md5" file that lists the md5sum of another file.
142 * @param dataFile the original file whose md5 was computed
143 * @param digest the computed digest
144 * @throws IOException
145 */
146 public static void saveMD5File(File dataFile, MD5Hash digest)
147 throws IOException {
148 final String digestString = StringUtils.byteToHexString(digest.getDigest());
149 saveMD5File(dataFile, digestString);
150 }
151
152 private static void saveMD5File(File dataFile, String digestString)
153 throws IOException {
154 File md5File = getDigestFileForFile(dataFile);
155 String md5Line = digestString + " *" + dataFile.getName() + "\n";
156
157 AtomicFileOutputStream afos = new AtomicFileOutputStream(md5File);
158 afos.write(md5Line.getBytes(Charsets.UTF_8));
159 afos.close();
160
161 if (LOG.isDebugEnabled()) {
162 LOG.debug("Saved MD5 " + digestString + " to " + md5File);
163 }
164 }
165
166 public static void renameMD5File(File oldDataFile, File newDataFile)
167 throws IOException {
168 final File fromFile = getDigestFileForFile(oldDataFile);
169 if (!fromFile.exists()) {
170 throw new FileNotFoundException(fromFile + " does not exist.");
171 }
172
173 final String digestString = readStoredMd5(fromFile).group(1);
174 saveMD5File(newDataFile, digestString);
175
176 if (!fromFile.delete()) {
177 LOG.warn("deleting " + fromFile.getAbsolutePath() + " FAILED");
178 }
179 }
180
181 /**
182 * @return a reference to the file with .md5 suffix that will
183 * contain the md5 checksum for the given data file.
184 */
185 public static File getDigestFileForFile(File file) {
186 return new File(file.getParentFile(), file.getName() + MD5_SUFFIX);
187 }
188 }