Detection rules › Kusto
Possible contact with a domain generated by a DGA
'Identifies contacts with domains names in CommonSecurityLog that might have been generated by a Domain Generation Algorithm (DGA). DGAs can be used by malware to generate rendezvous points that are difficult to predict in advance. This detection uses the Alexa Top 1 million domain names to build a model of what normal domains look like. It uses this to identify domains that may have been randomly generated by an algorithm. The triThreshold is set to 500 - increase this to report on domains that are less likely to have been randomly generated, decrease it for more likely. The start time and end time look back over 6 hours of data and the dgaLengthThreshold is set to 8 - meaning domains whose length is 8 or more are reported. NOTE - The top1M csv zip file used in the query is dynamic and may produce different results over various time periods. It's important to cross-check the events against the entities involved in the incident.'
MITRE ATT&CK coverage
| Tactic | Techniques |
|---|---|
| Command & Control | T1568 Dynamic Resolution |
Rule body kusto
id: 4acd3a04-2fad-4efc-8a4b-51476594cec4
name: Possible contact with a domain generated by a DGA
description: |
'Identifies contacts with domains names in CommonSecurityLog that might have been generated by a Domain Generation Algorithm (DGA). DGAs can be used by malware to generate rendezvous points that are difficult to predict in advance.
This detection uses the Alexa Top 1 million domain names to build a model of what normal domains look like. It uses this to identify domains that may have been randomly generated by an algorithm.
The triThreshold is set to 500 - increase this to report on domains that are less likely to have been randomly generated, decrease it for more likely.
The start time and end time look back over 6 hours of data and the dgaLengthThreshold is set to 8 - meaning domains whose length is 8 or more are reported.
NOTE - The top1M csv zip file used in the query is dynamic and may produce different results over various time periods. It's important to cross-check the events against the entities involved in the incident.'
severity: Medium
requiredDataConnectors:
- connectorId: Zscaler
dataTypes:
- CommonSecurityLog
- connectorId: Barracuda
dataTypes:
- CommonSecurityLog
- connectorId: CEF
dataTypes:
- CommonSecurityLog
- connectorId: CheckPoint
dataTypes:
- CommonSecurityLog
- connectorId: CiscoASA
dataTypes:
- CommonSecurityLog
- connectorId: F5
dataTypes:
- CommonSecurityLog
- connectorId: Fortinet
dataTypes:
- CommonSecurityLog
- connectorId: PaloAltoNetworks
dataTypes:
- CommonSecurityLog
queryFrequency: 6h
queryPeriod: 6h
triggerOperator: gt
triggerThreshold: 0
tactics:
- CommandAndControl
relevantTechniques:
- T1568
query: |
let triThreshold = 500;
let startTime = 6h;
let dgaLengthThreshold = 8;
// fetch the alexa top 1M domains
let top1M = (externaldata (Position:int, Domain:string) [@"http://s3-us-west-1.amazonaws.com/umbrella-static/top-1m.csv.zip"] with (format="csv", zipPattern="*.csv"));
// extract tri grams that are above our threshold - i.e. are common
let triBaseline = top1M
| extend Domain = tolower(extract("([^.]*).{0,7}$", 1, Domain))
| extend AllTriGrams = array_concat(extract_all("(...)", Domain), extract_all("(...)", substring(Domain, 1)), extract_all("(...)", substring(Domain, 2)))
| mvexpand Trigram=AllTriGrams
| summarize triCount=count() by tostring(Trigram)
| sort by triCount desc
| where triCount > triThreshold
| distinct Trigram;
// collect domain information from common security log, filter and extract the DGA candidate and its trigrams
let allDataSummarized = CommonSecurityLog
| where TimeGenerated > ago(startTime)
| where isnotempty(DestinationHostName)
| extend Name = tolower(DestinationHostName)
| distinct Name
| where Name has "."
| where Name !endswith ".home" and Name !endswith ".lan"
// extract DGA candidate
| extend DGADomain = extract("([^.]*).{0,7}$", 1, Name)
| where strlen(DGADomain) > dgaLengthThreshold
// throw out domains with number in them
| where DGADomain matches regex "^[A-Za-z]{0,}$"
// extract the tri grams from summarized data
| extend AllTriGrams = array_concat(extract_all("(...)", DGADomain), extract_all("(...)", substring(DGADomain, 1)), extract_all("(...)", substring(DGADomain, 2)));
// throw out domains that have repeating tri's and/or >=3 repeating letters
let nonRepeatingTris = allDataSummarized
| join kind=leftanti
(
allDataSummarized
| mvexpand AllTriGrams
| summarize count() by tostring(AllTriGrams), DGADomain
| where count_ > 1
| distinct DGADomain
)
on DGADomain;
// find domains that do not have a common tri in the baseline
let dataWithRareTris = nonRepeatingTris
| join kind=leftanti
(
nonRepeatingTris
| mvexpand AllTriGrams
| extend Trigram = tostring(AllTriGrams)
| distinct Trigram, DGADomain
| join kind=inner
(
triBaseline
)
on Trigram
| distinct DGADomain
)
on DGADomain;
dataWithRareTris
// join DGAs back on connection data
| join kind=inner
(
CommonSecurityLog
| where TimeGenerated > ago(startTime)
| where isnotempty(DestinationHostName)
| extend DestinationHostName = tolower(DestinationHostName)
| project-rename Name=DestinationHostName, DataSource=DeviceVendor
| summarize StartTime=min(TimeGenerated), EndTime=max(TimeGenerated) by Name, SourceIP, DestinationIP, DataSource
)
on Name
| project StartTime, EndTime, Name, DGADomain, SourceIP, DestinationIP, DataSource
entityMappings:
- entityType: IP
fieldMappings:
- identifier: Address
columnName: SourceIP
- entityType: DNS
fieldMappings:
- identifier: DomainName
columnName: Name
version: 1.0.6
kind: Scheduled
metadata:
source:
kind: Community
author:
name: Microsoft Security Research
support:
tier: Community
categories:
domains: [ "Security - Others" ]
Stages and Predicates
Parameters
let triThreshold = 500;
let startTime = 6h;
let dgaLengthThreshold = 8;
Let binding: top1M
let top1M = (externaldata (Position:int, Domain:string) [@"http://s3-us-west-1.amazonaws.com/umbrella-static/top-1m.csv.zip"] with (format="csv", zipPattern="*.csv"));
Let binding: triBaseline
let triBaseline = top1M
| extend Domain = tolower(extract("([^.]*).{0,7}$", 1, Domain))
| extend AllTriGrams = array_concat(extract_all("(...)", Domain), extract_all("(...)", substring(Domain, 1)), extract_all("(...)", substring(Domain, 2)))
| mvexpand Trigram=AllTriGrams
| summarize triCount=count() by tostring(Trigram)
| sort by triCount desc
| where triCount > triThreshold
| distinct Trigram;
Derived from triThreshold, top1M.
The stages below define let dataWithRareTris (the rule's main pipeline source).
Stage 1: source
CommonSecurityLog
Stage 2: where
| where TimeGenerated > ago(startTime)
Stage 3: where
| where isnotempty(DestinationHostName)
Stage 4: extend
| extend Name = tolower(DestinationHostName)
Stage 5: distinct
| distinct Name
Stage 6: where
| where Name has "."
Stage 7: where
| where Name !endswith ".home" and Name !endswith ".lan"
Stage 8: extend
| extend DGADomain = extract("([^.]*).{0,7}$", 1, Name)
Stage 9: where
| where strlen(DGADomain) > dgaLengthThreshold
Stage 10: where
| where DGADomain matches regex "^[A-Za-z]{0,}$"
Stage 11: extend
| extend AllTriGrams = array_concat(extract_all("(...)", DGADomain), extract_all("(...)", substring(DGADomain, 1)), extract_all("(...)", substring(DGADomain, 2)))
Stage 12: join (negated)
| join kind=leftanti
(
allDataSummarized
| mvexpand AllTriGrams
| summarize count() by tostring(AllTriGrams), DGADomain
| where count_ > 1
| distinct DGADomain
)
on DGADomain
Stage 13: join (negated)
| join kind=leftanti
(
nonRepeatingTris
| mvexpand AllTriGrams
| extend Trigram = tostring(AllTriGrams)
| distinct Trigram, DGADomain
| join kind=inner
(
triBaseline
)
on Trigram
| distinct DGADomain
)
on DGADomain
The stages below run on dataWithRareTris (the outer pipeline).
Stage 14: join
dataWithRareTris
| join kind=inner
(
CommonSecurityLog
| where TimeGenerated > ago(startTime)
| where isnotempty(DestinationHostName)
| extend DestinationHostName = tolower(DestinationHostName)
| project-rename Name=DestinationHostName, DataSource=DeviceVendor
| summarize StartTime=min(TimeGenerated), EndTime=max(TimeGenerated) by Name, SourceIP, DestinationIP, DataSource
)
on Name
Stage 15: project
| project StartTime, EndTime, Name, DGADomain, SourceIP, DestinationIP, DataSource
Stage 16: summarize
summarize by DGADomain
Stage 17: summarize
summarize by DGADomain
Stage 18: summarize
summarize
Stage 19: summarize
summarize by Name, SourceIP, DestinationIP, DataSource
Exclusions
Top-level NOT(...) conjuncts: predicates this rule actively suppresses.
| Field | Kind | Excluded values |
|---|---|---|
Name | ends_with | .home |
Name | ends_with | .lan |
DGADomain | gt | 8 |
DGADomain | regex_match | ^[A-Za-z]{0,}$ |
DestinationHostName | is_not_null | |
Name | match | . |
count_ | gt | 1 |
DGADomain | gt | 8 |
DGADomain | regex_match | ^[A-Za-z]{0,}$ |
DestinationHostName | is_not_null | |
Name | match | . |
count_ | gt | 1 |
triCount | gt | 500 |
Indicators
Each row is a field, operator, and value that the rule matches. The corpus column counts how many other rules in the catalog look for the same combination: high numbers point to widely-used, community-vetted indicators. Blank or 1 shows that the indicator is specific to this rule.
| Field | Kind | Values |
|---|---|---|
DGADomain | gt |
|
DGADomain | regex_match |
|
DestinationHostName | is_not_null | |
Name | ends_with |
|
Name | match |
|
Output fields
Fields the rule emits when it matches. Chronicle authors list these in the outcome block; they appear on the detection and $risk_score drives alerting. Sentinel / Defender XDR rules build them up through project / summarize / extend stages. Sentinel maps these into alert fields via entityMappings and customDetails; Defender XDR custom detections surface them as alert fields directly.
| Field | Source |
|---|---|
DataSource | summarize |
DestinationIP | summarize |
Name | summarize |
SourceIP | summarize |