准备
使用如下 java 代码进行演示(后面的示例都将使用该代码),该代码直接调用URLConnection
方法存在 SSRF 漏洞
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52
| package com.example.codeqltest.controller;
import com.example.codeqltest.util.HttpUtils; import org.apache.commons.codec.DecoderException; import org.apache.commons.codec.net.URLCodec; import org.springframework.web.bind.annotation.RequestMapping; import org.springframework.web.bind.annotation.RestController; import java.io.UnsupportedEncodingException;
@RestController @RequestMapping("/ssrf") public class SSRF { @RequestMapping(value = "/One") public String URLConnectionOne(String url) { return HttpUtils.URLConnection(url); }
@RequestMapping(value = "/Two") public String URLConnectionTwo(String url) { String httpUrl = url + "#test"; return HttpUtils.URLConnection(httpUrl); }
@RequestMapping(value = "/Three") public String URLConnectionThree(String url) { return HttpUtils.checkURL(url); }
@RequestMapping(value = "/Four") public String URLConnectionFour(String url) { if (HttpUtils.filterURL(url)) { return HttpUtils.URLConnection(url); } return "error"; }
@RequestMapping(value = "/Five") public String URLConnectionFive(String url) { URLCodec codec = new URLCodec(); try { String decodedUrl = codec.decode(url, "UTF-8"); return HttpUtils.URLConnection(decodedUrl); } catch (UnsupportedEncodingException | DecoderException e) { e.printStackTrace(); return "error"; } } }
|
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53
| package com.example.codeqltest.util;
import java.io.BufferedReader; import java.io.InputStreamReader; import java.net.InetAddress; import java.net.URL; import java.net.URLConnection; import java.util.regex.Matcher; import java.util.regex.Pattern;
public class HttpUtils { public static String URLConnection(String url) { try { URL u = new URL(url); URLConnection urlConnection = u.openConnection(); BufferedReader in = new BufferedReader(new InputStreamReader(urlConnection.getInputStream())); String inputLine; StringBuilder html = new StringBuilder();
while ((inputLine = in.readLine()) != null) { html.append(inputLine); } in.close(); return html.toString(); } catch (Exception e) {
return e.getMessage(); } }
public static boolean filterURL(String url) { try { InetAddress address = InetAddress.getByName(url); if ("127.0.0.1".equals(address.getHostAddress())) { return false; } return true; } catch (Exception e) {
return false; } }
public static String checkURL(String url){ String urlRegex = "[a-zA-z]+://[^\\s]* 或 ^http://([\\w-]+\\.)+[\\w-]+(/[\\w-./?%&=]*)?$"; Pattern urlPattern = Pattern.compile(urlRegex); Matcher matcher = urlPattern.matcher(url); if(matcher.find()){ return URLConnection(url); } return "error"; } }
|
通过如下命令创建 CodeQL 数据库
1
| codeql database create /CodeQL/databases/javaCode-db --language=java --command="mvn clean install --file pom.xml -Dmaven.test.skip=true" --source-root=./JavaCodeDemo
|
局部数据流
局部数据流是指在单个方法或可调用对象内的数据流。局部数据流通常比全局数据流更快速、高效和准确
要使用数据流库需要先将其导入
1
| import semmle.code.java.dataflow.DataFlow
|
使用局部数据流的方式如下:
1
| DataFlow::localFlow(Node node1, Node node2)
|
DataFlow
模块中定义了类Node
,表示数据可以流经的任何元素
通过局部数据流查询调用了URLConnection
方法的路由
1 2 3 4 5 6 7 8 9 10 11 12 13
| import java import semmle.code.java.dataflow.DataFlow
from Callable callable, Call call, DataFlow::Node source, DataFlow::Node sink where callable.getDeclaringType().hasQualifiedName("com.example.codeqltest.util", "HttpUtils") and callable.getName() = "URLConnection" and call.getCallee() = callable and exists(Method m | m.getDeclaringType().hasQualifiedName("com.example.codeqltest.controller", "SSRF") and m.getParameter(0) = source.asParameter()) and call.getArgument(0) = sink.asExpr() and DataFlow::localFlow(source, sink) select source, call.getEnclosingCallable()
|
运行后可以看到只查出方法 1 和方法 4,方法 3 没有查到是因为局部数据流只能查询单个方法内的数据流动,而在方法 2 和方法 5 中,source
并没有直接进入到URLConnection
方法中,因此也没有查到,这里要用到局部污点追踪

上面的 CodeQL 查询可以进行一下简化,使用如下写法
1
| DataFlow::localFlow(DataFlow::parameterNode(source), DataFlow::exprNode(sink))
|
ParameterNode
相当于source.asParameter()
ExprNode
相当于sink.asExpr()
1 2 3 4 5 6 7 8 9 10 11 12
| import java import semmle.code.java.dataflow.DataFlow
from Callable callable, Call call, Parameter p where callable.getDeclaringType().hasQualifiedName("com.example.codeqltest.util", "HttpUtils") and callable.getName() = "URLConnection" and exists(Method m | m.getDeclaringType().hasQualifiedName("com.example.codeqltest.controller", "SSRF") and m.getParameter(0) = p) and call.getCallee() = callable and DataFlow::localFlow(DataFlow::parameterNode(p), DataFlow::exprNode(call.getArgument(0))) select p, call.getEnclosingCallable()
|
局部污点追踪
对于污点追踪来说,如果x
是一个受污染的字符串,那么y
也同样受到了污染
1 2
| String temp = x; String y = temp + ", " + temp;
|
先做如下导入
1
| import semmle.code.java.dataflow.TaintTracking
|
使用方式如下,跟局部数据流的用法大同小异
1
| TaintTracking::localTaint(DataFlow::parameterNode(source), DataFlow::exprNode(sink))
|
只需要对上面的查询做微小的改动,查询写法如下:
1 2 3 4 5 6 7 8 9 10 11 12
| import java import semmle.code.java.dataflow.TaintTracking
from Callable callable, Call call, Parameter p where callable.getDeclaringType().hasQualifiedName("com.example.codeqltest.util", "HttpUtils") and callable.getName() = "URLConnection" and exists(Method m | m.getDeclaringType().hasQualifiedName("com.example.codeqltest.controller", "SSRF") and m.getParameter(0) = p) and call.getCallee() = callable and TaintTracking::localTaint(DataFlow::parameterNode(p), DataFlow::exprNode(call.getArgument(0))) select p, call.getEnclosingCallable()
|
查询结果如下,方法 3 没查询出来是对的,方法 5 没查询到的原因后面会讲到

全局数据流
全局数据流跟踪整个程序的数据流,全局数据流的精确度低于局部数据流,分析需要更多的时间和内存来执行。
通过实现DataFlow::ConfigSig
签名,并应用模块DataFlow::Global<ConfigSig>
来使用全局数据流库
1 2 3 4 5 6 7 8 9 10 11 12 13
| import semmle.code.java.dataflow.DataFlow
module MyFlowConfiguration implements DataFlow::ConfigSig { predicate isSource(DataFlow::Node source) { ... }
predicate isSink(DataFlow::Node sink) { ... } }
module MyFlow = DataFlow::Global<MyFlowConfiguration>;
|
isSource
和isSink
这两个谓词是必须要实现的,通过这两个谓词来定义source
和sink
,还有两个谓词isBarrier
和isAdditionalFlowStep
是可选的,这两个谓词在后面的全局污点追踪中会用到
查询写法如下:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22
| import java import semmle.code.java.dataflow.DataFlow import semmle.code.java.dataflow.FlowSources
module MyFlowConfiguration implements DataFlow::ConfigSig {
predicate isSource(DataFlow::Node source) { source instanceof RemoteFlowSource }
predicate isSink(DataFlow::Node sink) { exists(MethodCall ma | ma.getMethod().hasName("URLConnection") and ma.getMethod().getDeclaringType().hasName("HttpUtils") and sink.asExpr()=ma.getAnArgument() ) } }
module MyFlow = DataFlow::Global<MyFlowConfiguration>;
from DataFlow::Node source, DataFlow::Node sink where MyFlow::flow(source, sink) select source, source.getEnclosingCallable(), sink, sink.getEnclosingCallable()
|
查询结果如下,这次成功查到了方法 3

全局污点追踪
全局污点跟踪与全局数据流的关系就像局部污点跟踪与局部数据流的关系,只不过这里使用的模块是TaintTracking::Global<ConfigSig>
1 2 3 4 5 6 7 8 9 10 11 12 13
| import semmle.code.java.dataflow.TaintTracking
module MyFlowConfiguration implements DataFlow::ConfigSig { predicate isSource(DataFlow::Node source) { ... }
predicate isSink(DataFlow::Node sink) { ... } }
module MyFlow = TaintTracking::Global<MyFlowConfiguration>;
|
查询写法如下:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22
| import java import semmle.code.java.dataflow.TaintTracking import semmle.code.java.dataflow.FlowSources
module MyFlowConfiguration implements DataFlow::ConfigSig {
predicate isSource(DataFlow::Node source) { source instanceof RemoteFlowSource }
predicate isSink(DataFlow::Node sink) { exists(MethodCall ma | ma.getMethod().hasName("URLConnection") and ma.getMethod().getDeclaringType().hasName("HttpUtils") and sink.asExpr()=ma.getAnArgument() ) } }
module MyFlow = TaintTracking::Global<MyFlowConfiguration>;
from DataFlow::Node source, DataFlow::Node sink where MyFlow::flow(source, sink) select source, source.getEnclosingCallable(), sink, sink.getEnclosingCallable()
|
通过该查询可以看到还是无法查询到方法 5

在 CodeQL 的 issue 中有提到
在进行流分析和污点分析时,CodeQL 只分析经过用户代码的路径。对第三方方法的调用被视为黑箱,除非你添加一些额外的建模步骤。这就是isAdditional{Flow|Taint}Step
的作用。你可以通过实现这些谓词并描述数据的进出方式来建模库代码中的路径
所以说当流经过第三方库的方法时,追踪断掉了,需要通过isAdditionalFlowStep
将其连接起来
1 2 3 4 5 6 7 8
| predicate isAdditionalFlowStep(DataFlow::Node n1, DataFlow::Node n2) { exists(MethodCall ma | ma.getMethod().getName() = "decode" and ma.getMethod().getDeclaringType().hasQualifiedName("org.apache.commons.codec.net", "URLCodec") and ma.getArgument(0) = n1.asExpr() and ma = n2.asExpr() ) }
|
而在方法 4 中添加了过滤方法,通过该方法不存在 SSRF 漏洞,所以要将方法 4 排除在外,这就是isBarrier
谓词的作用,告诉 CodeQL 当流经过哪个方法时,这条路径是安全的
1 2 3
| predicate isBarrier(DataFlow::Node node) { exists(MethodCall ma |ma.getMethod().getName()="filterURL" and ma.getArgument(0)=node.asExpr() ) }
|
所以最终写法如下
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35
| import java import semmle.code.java.dataflow.TaintTracking import semmle.code.java.dataflow.FlowSources
module MyFlowConfiguration implements DataFlow::ConfigSig {
predicate isSource(DataFlow::Node source) { source instanceof RemoteFlowSource }
predicate isSink(DataFlow::Node sink) { exists(MethodCall ma | ma.getMethod().hasName("URLConnection") and ma.getMethod().getDeclaringType().hasName("HttpUtils") and sink.asExpr()=ma.getAnArgument() ) }
predicate isAdditionalFlowStep(DataFlow::Node n1, DataFlow::Node n2) { exists(MethodCall ma | ma.getMethod().getName() = "decode" and ma.getMethod().getDeclaringType().hasQualifiedName("org.apache.commons.codec.net", "URLCodec") and ma.getArgument(0) = n1.asExpr() and ma = n2.asExpr() ) } predicate isBarrier(DataFlow::Node node) { exists(MethodCall ma |ma.getMethod().getName()="filterURL" and ma.getArgument(0)=node.asExpr() ) } }
module MyFlow = TaintTracking::Global<MyFlowConfiguration>;
from DataFlow::Node source, DataFlow::Node sink where MyFlow::flow(source, sink) select source, source.getEnclosingCallable(), sink, sink.getEnclosingCallable()
|
通过以上写法得到的正确的查询结果

参考链接