In this work, we propose SigMal, a fast and precise malware detection framework based on signal processing techniques. SigMal is designed to operate with systems that process large amounts of binary samples. It has been observed that many samples received by such systems are variants of previously-seen malware, and they retain some similarity at the binary level. Previous systems used this notion of malware similarity to detect new variants of previously-seen malware. SigMal improves the state-of-the-art by leveraging techniques borrowed from signal processing to extract noise-resistant similarity signatures from the samples. SigMal uses an efficient nearest-neighbor search technique, which is scalable to millions of samples. We evaluate SigMal on 1.2 million recent samples, both packed and unpacked, observed over a duration of three months. In addition, we also used a constant dataset of known benign executables. Our results show that SigMal can classify 50% of the recent incoming samples with above 99% precision. We also show that SigMal could have detected, on average, 70 malware samples per day before any antivirus vendor detected them.
@inproceedings{Kirat2015SigMal_A, title = {{SigMal: A Static Signal Processing Based Malware Triage}}, author = {Kirat, Dhilung and Nataraj, Lakshmanan and Vigna, Giovanni and Manjunath, B. S.}, booktitle = {Proceedings of the 29th Annual Computer Security Applications Conference}, series = {ACSAC '13}, year = {2013}, address = {New York, NY, USA}, doi = {10.1145/2523649.2523682}, isbn = {978-1-4503-2015-3}, pages = {89--98}, publisher = {ACM}, url = {https://doi.org/10.1145/2523649.2523682} }