YARA Rules

Detection

YARA is the pattern matching swiss army knife for malware researchers. Learn to write effective detection rules that identify malware families, threat actor tools, and suspicious files.

Why YARA?

YARA rules are used by antivirus engines, SIEM platforms, sandboxes, and incident responders worldwide. A single well-crafted rule can detect entire malware families across thousands of samples.

YARA Rule Structure

Every YARA rule has three main sections: metadata, strings, and a condition.

text
rule ExampleMalware {
    meta:
        author = "Security Analyst"
        description = "Detects Example Malware family"
        date = "2024-01-15"
        hash = "abc123..."
        reference = "https://example.com/analysis"
        
    strings:
        $mz = "MZ"                              // PE header
        $str1 = "malicious_function" ascii      // ASCII string
        $str2 = "C:\\Windows\\Temp" wide    // Wide string (Unicode)
        $hex = { 4D 5A 90 00 03 00 00 00 }     // Hex pattern
        $regex = /[a-z]{5}\.(exe|dll)/i       // Regex pattern
        
    condition:
        $mz at 0 and                           // MZ at file start
        2 of ($str*) and                       // Any 2 strings
        filesize < 1MB                         // Size limit
}

String Types

Text Strings

text
strings:
    // Basic ASCII
    $s1 = "CreateRemoteThread"
    
    // Case insensitive
    $s2 = "kernel32.dll" nocase
    
    // Wide (Unicode) strings
    $s3 = "password" wide
    
    // Both ASCII and Wide
    $s4 = "admin" ascii wide
    
    // Full word only
    $s5 = "cmd" fullword

Hex Patterns

text
strings:
    // Exact bytes
    $hex1 = { 4D 5A 90 00 }
    
    // Wildcards (any byte)
    $hex2 = { 4D 5A ?? 00 }
    
    // Nibble wildcards
    $hex3 = { 4D 5? 90 ?0 }
    
    // Jumps (variable length)
    $hex4 = { 4D 5A [2-4] 00 }
    
    // Alternatives
    $hex5 = { 4D ( 5A | 5B ) 90 }

Regular Expressions

text
strings:
    // Basic regex
    $r1 = /https?:\/\/[a-z0-9]+\.[a-z]{2,}/
    
    // Case insensitive
    $r2 = /[a-z]+\.exe/i
    
    // Multiline
    $r3 = /^malware$/m
    
    // Common C2 patterns
    $c2_ip = /\b\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\b/
    $c2_url = /https?:\/\/[^\s\"'<>]+/
    $c2_domain = /[a-z0-9][-a-z0-9]*\.(com|net|org|io)/i

Condition Logic

text
condition:
    // Boolean operators
    $s1 and $s2
    $s1 or $s2
    not $s1
    
    // Count matches
    #s1 > 5                    // More than 5 matches
    #s1 == 3                   // Exactly 3 matches
    
    // String sets
    all of them              // All strings must match
    any of them              // Any string matches
    2 of them                // At least 2 strings
    3 of ($a*)               // 3 strings starting with $a
    
    // Position-based
    $mz at 0                 // At offset 0
    $s1 in (0..100)         // In first 100 bytes
    
    // File attributes
    filesize < 500KB
    filesize > 1MB and filesize < 10MB
    
    // Combining conditions
    uint16(0) == 0x5A4D and    // MZ header check
    uint32(uint32(0x3C)) == 0x00004550 and  // PE signature
    filesize < 2MB

Practical Examples

Detect Mimikatz

text
rule Mimikatz_Detection {
    meta:
        description = "Detects Mimikatz credential dumper"
        author = "Security Team"
        severity = "critical"
        
    strings:
        $s1 = "sekurlsa::logonpasswords" ascii wide nocase
        $s2 = "sekurlsa::wdigest" ascii wide nocase
        $s3 = "lsadump::sam" ascii wide nocase
        $s4 = "lsadump::dcsync" ascii wide nocase
        $s5 = "kerberos::golden" ascii wide nocase
        $s6 = "privilege::debug" ascii wide nocase
        
        // Binary signatures
        $hex1 = { 6D 69 6D 69 6B 61 74 7A }  // "mimikatz"
        $hex2 = { 67 65 6E 74 69 6C 6B 69 77 69 }  // "gentilkiwi"
        
    condition:
        uint16(0) == 0x5A4D and
        (3 of ($s*) or any of ($hex*))
}

Detect Cobalt Strike Beacon

text
rule CobaltStrike_Beacon {
    meta:
        description = "Detects Cobalt Strike Beacon payload"
        author = "Security Team"
        
    strings:
        // Config markers
        $cfg1 = { 00 01 00 01 00 02 }
        $cfg2 = { 00 02 00 01 00 02 }
        
        // Common strings
        $s1 = "%s (admin)" ascii wide
        $s2 = "beacon.dll" ascii
        $s3 = "ReflectiveLoader" ascii
        $s4 = "%02d/%02d/%02d %02d:%02d:%02d" ascii
        
        // Sleep mask
        $sleep = { 4C 8B DC 49 89 5B 08 49 89 6B 10 49 89 73 18 }
        
    condition:
        uint16(0) == 0x5A4D and
        (all of ($cfg*) or 3 of ($s*) or $sleep)
}

Detect Webshells

text
rule WebShell_PHP_Generic {
    meta:
        description = "Detects common PHP webshell patterns"
        filetype = "php"
        
    strings:
        // Dangerous functions
        $f1 = "eval(" ascii nocase
        $f2 = "base64_decode(" ascii nocase
        $f3 = "system(" ascii nocase
        $f4 = "shell_exec(" ascii nocase
        $f5 = "passthru(" ascii nocase
        $f6 = "exec(" ascii nocase
        $f7 = "popen(" ascii nocase
        $f8 = "proc_open(" ascii nocase
        
        // Obfuscation patterns
        $o1 = "chr(" ascii nocase
        $o2 = "gzinflate(" ascii nocase
        $o3 = "str_rot13(" ascii nocase
        $o4 = "\$_GET" ascii
        $o5 = "\$_POST" ascii
        $o6 = "\$_REQUEST" ascii
        
        // Known webshell strings
        $ws1 = "c99shell" ascii nocase
        $ws2 = "r57shell" ascii nocase
        $ws3 = "WSO " ascii
        $ws4 = "FilesMan" ascii
        
    condition:
        (2 of ($f*) and 2 of ($o*)) or
        any of ($ws*)
}

YARA Modules

YARA modules extend functionality for specific file types and analysis:

PE Module

text
import "pe"

rule PE_Packed {
    condition:
        pe.number_of_sections < 3 or
        pe.entry_point_raw == 0 or
        pe.imports("kernel32.dll", "VirtualAlloc")
}

Hash Module

text
import "hash"

rule Known_Malware_Hash {
    condition:
        hash.md5(0, filesize) == 
          "44d88612fea8a8f36de82e1278abb02f" or
        hash.sha256(0, filesize) ==
          "275a021bbfb64..."
}

Math Module

text
import "math"

rule High_Entropy_Section {
    condition:
        for any section in pe.sections : (
            math.entropy(section.raw_data_offset,
              section.raw_data_size) > 7.5
        )
}

ELF Module

text
import "elf"

rule ELF_Suspicious {
    condition:
        elf.type == elf.ET_EXEC and
        elf.entry_point < 0x8000000
}

Running YARA

bash
# Scan single file
yara rules.yar suspicious.exe

# Scan directory recursively
yara -r rules.yar /path/to/samples/

# Multiple rule files
yara rule1.yar rule2.yar sample.exe

# Scan process memory (Linux)
yara -p 1234 rules.yar

# Output matching strings
yara -s rules.yar sample.exe

# Output metadata
yara -m rules.yar sample.exe

# Compile rules for faster scanning
yarac rules.yar compiled.yarc
yara -C compiled.yarc samples/

# Timeout per file
yara -t 30 rules.yar samples/

Best Practices

✅ Do

  • • Use descriptive rule and string names
  • • Include comprehensive metadata (author, date, references)
  • • Test against clean files to avoid false positives
  • • Use filesize limits to improve performance
  • • Combine multiple indicators for accuracy
  • • Version control your rules

❌ Don't

  • • Use overly broad patterns (e.g., just "http://")
  • • Rely on single indicators
  • • Forget to test on legitimate software
  • • Use expensive regex without constraints
  • • Ignore performance impact on large scans

YARA Resources