The essence of my method is to create a hash for media items, save to a Solr index and add a processor for checking when the content editor loads a media file.
For the hash, I use System.Security.Cryptography.MD5. Firstly I created a method that converts Stream to MD5.
public static string StreamToMD5(Stream stream)
{
var md5 = MD5.Create();
Byte[] bytes = md5.ComputeHash(stream);
var hash = md5.ComputeHash(bytes);
return Convert.ToBase64String(hash);
}
As you can see, I converted the hash to Base64. It is needed to simplify storage in Solr.
After that, I implemented a computed field that will keep the hash value.
public class MediaHashComputedField : IComputedIndexField
{
public string FieldName { get; set; }
public string ReturnType { get; set; }
public object ComputeFieldValue(IIndexable indexable)
{
var item = (Item)(indexable as SitecoreIndexableItem);
if (item == null)
{
return null;
}
if (!item.HasBaseTemplate(Sitecore.TemplateIDs.UnversionedImage) && !item.HasBaseTemplate(Sitecore.TemplateIDs.VersionedImage))
{
return null;
}
var mediaItem = new MediaItem(item);
using (Stream stream = mediaItem.GetMediaStream())
{
if (stream == null)
{
return null;
}
return MD5Helper.StreamToMD5(stream);
}
}
}
Only Unversioned and Versioned images will have this hash in Solr. You can extend it for other media file types.
Connecting the computed field through Sitecore configuration.
<configuration xmlns:patch="http://www.sitecore.net/xmlconfig/"
xmlns:env="http://www.sitecore.net/xmlconfig/env/">
<sitecore>
<contentSearch>
<indexConfigurations>
<defaultSolrIndexConfiguration>
<documentOptions>
<fields hint="raw:AddComputedIndexField">
<field fieldName="mediahash" returnType="string">DTCM.Feature.DuplicateMedia.ComputedFileds.MediaHashComputedField, DTCM.Feature.DuplicateMedia</field>
</fields>
</documentOptions>
</defaultSolrIndexConfiguration>
</indexConfigurations>
</contentSearch>
</sitecore>
</configuration>
The second step is creating a searcher that will have a method for searching images by hash.
public interface IMediaSearcher
{
IEnumerable<MediaSearchResultItem> GetMediaByHash(string hash);
}
[Service(typeof(IMediaSearcher))]
public class MediaSearcher : IMediaSearcher
{
protected string IndexName => “sitecore_master_index”
protected ID mediaLibrary => new ID("{3D6658D8-A0BF-4E75-B3E2-D050FABCF4E1}");
public IEnumerable<MediaSearchResultItem> GetMediaByHash(string hash)
{
if (string.IsNullOrWhiteSpace(hash)) return null;
var index = ContentSearchManager.GetIndex(this.IndexName);
using (var context = index.CreateSearchContext())
{
var query = context.GetQueryable<MediaSearchResultItem>();
query = query.Where(i =>
i.Paths.Contains(mediaLibrary) &&
i.MediaHash == hash);
var result = query.GetResults()?.Select(x => x.Document);
return result;
}
}
}
I use this searcher in a custom upload processor.
The custom upload processor looks like this:
public class CheckDuplicatesByHash : UploadProcessor
{
//connect searcher through ServiceLocator, this searcher described above
protected IMediaSearcher mediaSearcher => ServiceLocator.ServiceProvider.GetService<IMediaSearcher>();
protected HttpContextBase httpContextBase => (HttpContextBase)new HttpContextWrapper(HttpContext.Current);
public void Process(UploadArgs args)
{
Assert.ArgumentNotNull((object)args, nameof(args));
if (args.Destination == UploadDestination.File)
return;
//content editors can load list files
foreach (string file1 in (NameObjectCollectionBase)args.Files)
{
HttpPostedFile file2 = args.Files[file1];
//images place in archive
if (!string.IsNullOrEmpty(file2.FileName))
{
if (UploadProcessor.IsUnpack(args, file2))
{
ZipReader zipReader = new ZipReader(file2.InputStream);
try
{
foreach (ZipEntry entry in zipReader.Entries)
{
if (entry.Size == 0)
{
continue;
}
var hash = MD5Helper.StreamToMD5(entry.GetStream());
var searchMediaItems = this.mediaSearcher.GetMediaByHash(hash).ToList();
if (searchMediaItems.Any())
{
string text = HttpUtility.HtmlEncode(file2.FileName + "/" + entry.Name);
var duplicateItemNames = string.Join(", ",searchMediaItems.Select(i => i.Name));
var duplicateItemPath = string.Join(", ",searchMediaItems.Select(i => i.Path));
var errorText = $"The file \"{text}\" is duplicate sitecore media {duplicateItemNames} ({duplicateItemPath}).";
if (this.httpContextBase.Request.Path.Contains("Upload Media/UploadMedia2.aspx"))
{
this.httpContextBase.Response.Write(this.ErrorMessageScript(errorText));
}
args.ErrorText = errorText;
args.AbortPipeline();
return;
}
}
}
finally
{
file2.InputStream.Position = 0L;
}
}
//images are loading without archive
else
{
var hash = MD5Helper.StreamToMD5(file2.InputStream);
var searchMediaItems = this.mediaSearcher.GetMediaByHash(hash).ToList();
if (searchMediaItems.Any())
{
string fileName = HttpUtility.HtmlEncode(file2.FileName);
var duplicateItemNames = string.Join(", ", searchMediaItems.Select(i => i.Name));
var duplicateItemPath = string.Join(", ", searchMediaItems.Select(i => i.Path));
var errorText = $"The file \"{fileName}\" is duplicate sitecore media {duplicateItemNames} ({duplicateItemPath}).";
if (this.httpContextBase.Request.Path.Contains("Upload Media/UploadMedia2.aspx"))
{
this.httpContextBase.Response.Write(this.ErrorMessageScript(errorText));
}
args.ErrorText = errorText;
args.AbortPipeline();
break;
}
}
}
}
}
//method for show alert with information about same image which sitecore has
private string ErrorMessageScript(string message)
{
return $"<html><head><script type=\"text/JavaScript\" language=\"javascript\">alert('{message}')</script></head></html>";
}
}
This method is not elegant but it works with different loading types (through an archive, with list images, etc.)
Connecting the processor to Sitecore configuration.
<configuration xmlns:patch="http://www.sitecore.net/xmlconfig/"
xmlns:env="http://www.sitecore.net/xmlconfig/env/">
<sitecore>
<processors>
<uiUpload>
<processor type="DTCM.Feature.DuplicateMedia.Pipelines.UploadProcessor.CheckDuplicatesByHash, DTCM.Feature.DuplicateMedia" mode="on"
patch:after="*[@type='Sitecore.Pipelines.Upload.CheckSize, Sitecore.Kernel']" />
</uiUpload>
</processors>
</sitecore>
</configuration>
Demo:
Uploading a file with the same image that I have in Sitecore.