作者:昏鴉@知道創宇404區塊鏈安全研究團隊
時間:2020年8月18日

前言

以太坊(Ethereum)是一個開源的有智能合約功能的公共區塊鏈平臺,通過其專用加密貨幣以太幣(ETH)提供去中心化的以太坊虛擬機(EVM)來處理點對點合約。EVM(Ethereum Virtual Machine),以太坊虛擬機的簡稱,是以太坊的核心之一。智能合約的創建和執行都由EVM來完成,簡單來說,EVM是一個狀態執行的機器,輸入是solidity編譯后的二進制指令和節點的狀態數據,輸出是節點狀態的改變。

以太坊短地址攻擊,最早由Golem團隊于2017年4月提出,是由于底層EVM的設計缺陷導致的漏洞。ERC20代幣標準定義的轉賬函數如下:

function transfer(address to, uint256 value) public returns (bool success)

如果傳入的to是末端缺省的短地址,EVM會將后面字節補足地址,而最后的value值不足則用0填充,導致實際轉出的代幣數值倍增。

本文從以太坊源碼的角度分析EVM底層是如何處理執行智能合約字節碼的,并簡要分析短地址攻擊的原理。

EVM源碼分析

evm.go

EVM的源碼位于go-ethereum/core/vm/目錄下,在evm.go中定義了EVM結構體,并實現了EVM.CallEVM.CallCodeEVM.DelegateCallEVM.StaticCall四種方法來調用智能合約,EVM.Call實現了基本的合約調用的功能,后面三種方法與EVM.Call略有區別,但最終都調用run函數來解析執行智能合約

EVM.Call
// Call executes the contract associated with the addr with the given input as
// parameters. It also handles any necessary value transfer required and takes
// the necessary steps to create accounts and reverses the state in case of an
// execution error or failed value transfer.
//hunya// 基本的合約調用
func (evm *EVM) Call(caller ContractRef, addr common.Address, input []byte, gas uint64, value *big.Int) (ret []byte, leftOverGas uint64, err error) {
    if evm.vmConfig.NoRecursion && evm.depth > 0 {
        return nil, gas, nil
    }

    // Fail if we're trying to execute above the call depth limit
    if evm.depth > int(params.CallCreateDepth) {
        return nil, gas, ErrDepth
    }
    // Fail if we're trying to transfer more than the available balance
    if !evm.Context.CanTransfer(evm.StateDB, caller.Address(), value) {
        return nil, gas, ErrInsufficientBalance
    }

    var (
        to       = AccountRef(addr)
        snapshot = evm.StateDB.Snapshot()
    )
    if !evm.StateDB.Exist(addr) {
        precompiles := PrecompiledContractsHomestead
        if evm.chainRules.IsByzantium {
            precompiles = PrecompiledContractsByzantium
        }
        if evm.chainRules.IsIstanbul {
            precompiles = PrecompiledContractsIstanbul
        }
        if precompiles[addr] == nil && evm.chainRules.IsEIP158 && value.Sign() == 0 {
            // Calling a non existing account, don't do anything, but ping the tracer
            if evm.vmConfig.Debug && evm.depth == 0 {
                evm.vmConfig.Tracer.CaptureStart(caller.Address(), addr, false, input, gas, value)
                evm.vmConfig.Tracer.CaptureEnd(ret, 0, 0, nil)
            }
            return nil, gas, nil
        }
        evm.StateDB.CreateAccount(addr)
    }
    evm.Transfer(evm.StateDB, caller.Address(), to.Address(), value)
    // Initialise a new contract and set the code that is to be used by the EVM.
    // The contract is a scoped environment for this execution context only.
    contract := NewContract(caller, to, value, gas)
    contract.SetCallCode(&addr, evm.StateDB.GetCodeHash(addr), evm.StateDB.GetCode(addr))

    // Even if the account has no code, we need to continue because it might be a precompile
    start := time.Now()

    // Capture the tracer start/end events in debug mode
    // debug模式會捕獲tracer的start/end事件
    if evm.vmConfig.Debug && evm.depth == 0 {
        evm.vmConfig.Tracer.CaptureStart(caller.Address(), addr, false, input, gas, value)

        defer func() { // Lazy evaluation of the parameters
            evm.vmConfig.Tracer.CaptureEnd(ret, gas-contract.Gas, time.Since(start), err)
        }()
    }
    ret, err = run(evm, contract, input, false)//hunya// 調用run函數執行合約

    // When an error was returned by the EVM or when setting the creation code
    // above we revert to the snapshot and consume any gas remaining. Additionally
    // when we're in homestead this also counts for code storage gas errors.
    if err != nil {
        evm.StateDB.RevertToSnapshot(snapshot)
        if err != errExecutionReverted {
            contract.UseGas(contract.Gas)
        }
    }
    return ret, contract.Gas, err
}
EVM.CallCode
// CallCode executes the contract associated with the addr with the given input
// as parameters. It also handles any necessary value transfer required and takes
// the necessary steps to create accounts and reverses the state in case of an
// execution error or failed value transfer.
//
// CallCode differs from Call in the sense that it executes the given address'
// code with the caller as context.
//hunya// 類似solidity中的call函數,調用外部合約,執行上下文在被調用合約中
func (evm *EVM) CallCode(caller ContractRef, addr common.Address, input []byte, gas uint64, value *big.Int) (ret []byte, leftOverGas uint64, err error) {
    if evm.vmConfig.NoRecursion && evm.depth > 0 {
        return nil, gas, nil
    }

    // Fail if we're trying to execute above the call depth limit
    if evm.depth > int(params.CallCreateDepth) {
        return nil, gas, ErrDepth
    }
    // Fail if we're trying to transfer more than the available balance
    if !evm.CanTransfer(evm.StateDB, caller.Address(), value) {
        return nil, gas, ErrInsufficientBalance
    }

    var (
        snapshot = evm.StateDB.Snapshot()
        to       = AccountRef(caller.Address())
    )
    // Initialise a new contract and set the code that is to be used by the EVM.
    // The contract is a scoped environment for this execution context only.
    contract := NewContract(caller, to, value, gas)
    contract.SetCallCode(&addr, evm.StateDB.GetCodeHash(addr), evm.StateDB.GetCode(addr))

    ret, err = run(evm, contract, input, false)//hunya// 調用run函數執行合約
    if err != nil {
        evm.StateDB.RevertToSnapshot(snapshot)
        if err != errExecutionReverted {
            contract.UseGas(contract.Gas)
        }
    }
    return ret, contract.Gas, err
}
EVM.DelegateCall
// DelegateCall executes the contract associated with the addr with the given input
// as parameters. It reverses the state in case of an execution error.
//
// DelegateCall differs from CallCode in the sense that it executes the given address'
// code with the caller as context and the caller is set to the caller of the caller.
//hunya// 類似solidity中的delegatecall函數,調用外部合約,執行上下文在調用合約中
func (evm *EVM) DelegateCall(caller ContractRef, addr common.Address, input []byte, gas uint64) (ret []byte, leftOverGas uint64, err error) {
    if evm.vmConfig.NoRecursion && evm.depth > 0 {
        return nil, gas, nil
    }
    // Fail if we're trying to execute above the call depth limit
    if evm.depth > int(params.CallCreateDepth) {
        return nil, gas, ErrDepth
    }

    var (
        snapshot = evm.StateDB.Snapshot()
        to       = AccountRef(caller.Address())
    )

    // Initialise a new contract and make initialise the delegate values
    contract := NewContract(caller, to, nil, gas).AsDelegate()
    contract.SetCallCode(&addr, evm.StateDB.GetCodeHash(addr), evm.StateDB.GetCode(addr))

    ret, err = run(evm, contract, input, false)//hunya// 調用run函數執行合約
    if err != nil {
        evm.StateDB.RevertToSnapshot(snapshot)
        if err != errExecutionReverted {
            contract.UseGas(contract.Gas)
        }
    }
    return ret, contract.Gas, err
}
EVM.StaticCall
// StaticCall executes the contract associated with the addr with the given input
// as parameters while disallowing any modifications to the state during the call.
// Opcodes that attempt to perform such modifications will result in exceptions
// instead of performing the modifications.
//hunya// 與EVM.Call類似,但不允許執行會修改永久存儲的數據的指令
func (evm *EVM) StaticCall(caller ContractRef, addr common.Address, input []byte, gas uint64) (ret []byte, leftOverGas uint64, err error) {
    if evm.vmConfig.NoRecursion && evm.depth > 0 {
        return nil, gas, nil
    }
    // Fail if we're trying to execute above the call depth limit
    if evm.depth > int(params.CallCreateDepth) {
        return nil, gas, ErrDepth
    }

    var (
        to       = AccountRef(addr)
        snapshot = evm.StateDB.Snapshot()
    )
    // Initialise a new contract and set the code that is to be used by the EVM.
    // The contract is a scoped environment for this execution context only.
    contract := NewContract(caller, to, new(big.Int), gas)
    contract.SetCallCode(&addr, evm.StateDB.GetCodeHash(addr), evm.StateDB.GetCode(addr))

    // We do an AddBalance of zero here, just in order to trigger a touch.
    // This doesn't matter on Mainnet, where all empties are gone at the time of Byzantium,
    // but is the correct thing to do and matters on other networks, in tests, and potential
    // future scenarios
    evm.StateDB.AddBalance(addr, bigZero)

    // When an error was returned by the EVM or when setting the creation code
    // above we revert to the snapshot and consume any gas remaining. Additionally
    // when we're in Homestead this also counts for code storage gas errors.
    ret, err = run(evm, contract, input, true)//hunya// 調用run函數執行合約
    if err != nil {
        evm.StateDB.RevertToSnapshot(snapshot)
        if err != errExecutionReverted {
            contract.UseGas(contract.Gas)
        }
    }
    return ret, contract.Gas, err
}

run函數前半段是判斷是否是以太坊內置預編譯的特殊合約,有單獨的運行方式

后半段則是對于一般的合約調用解釋器interpreter去執行調用

interpreter.go

解釋器相關代碼在interpreter.go中,interpreter是一個接口,目前僅有EVMInterpreter這一個具體實現

合約經由EVM.Call調用Interpreter.Run來到EVMInpreter.Run

EVMInterpreterRun方法代碼較長,其中處理執行合約字節碼的主循環如下:

大部分代碼主要是檢查準備運行環境,執行合約字節碼的核心代碼主要是以下3行

op = contract.GetOp(pc)
operation := in.cfg.JumpTable[op]
......
res, err = operation.execute(&pc, in, contract, mem, stack)
......

interpreter的主要工作實際上只是通過JumpTable查找指令,起到一個翻譯解析的作用

最終的執行是通過調用operation對象的execute方法

jump_table.go

operation的定義位于jump_table.go

jump_table.go中還定義了JumpTable和多種不同的指令集

在基本指令集中有三個處理input的指令,分別是CALLDATALOADCALLDATASIZECALLDATACOPY

jump_table.go中的代碼同樣只是起到解析的功能,提供了指令的查找,定義了每個指令具體的執行函數

instructions.go

instructions.go中是所有指令的具體實現,上述三個函數的具體實現如下:

這三個函數的作用分別是從input加載參數入棧、獲取input大小、復制input中的參數到內存

我們重點關注opCallDataLoad函數是如何處理input中的參數入棧的

opCallDataLoad函數調用getDataBig函數,傳入contract.Inputstack.pop()big32,將結果轉為big.Int入棧

getDataBig函數以stack.pop()棧頂元素作為起始索引,截取inputbig32大小的數據,然后傳入common.RightPadBytes處理并返回

其中涉及到的另外兩個函數math.BigMincommon.RightPadBytes如下:

//file: go-thereum/common/math/big.go
func BigMin(x, y *big.Int) *big.Int {
    if x.Cmp(y) > 0 {
        return y
    }
    return x
}
//file: go-ethereum/common/bytes.go
func RightPadBytes(slice []byte, l int) []byte {
    if l <= len(slice) {
        return slice
    }
    //右填充0x00至l位
    padded := make([]byte, l)
    copy(padded, slice)

    return padded
}

分析到這里,基本上已經能很明顯看到問題所在了

RightPadBytes函數會將傳入的字節切片右填充至l位長度,而l是被傳入的big32,即32位長度

所以在短地址攻擊中,調用的transfer(address to, uint256 value)函數,如果to是低位缺省的地址,由于EVM在處理時是固定截取32位長度的,所以會將value數值高位補的0算進to的末端,而在截取value時由于位數不夠32位,則右填充0x00至32位,最終導致轉賬的value指數級增大

測試與復現

編寫一個簡單的合約來測試

pragma solidity ^0.5.0;

contract Test {
    uint256 internal _totalSupply;

    mapping(address => uint256) internal _balances;

    event Transfer(address indexed from, address indexed to, uint256 value);

    constructor() public {
        _totalSupply = 1 * 10 ** 18;
        _balances[msg.sender] = _totalSupply;
    }

    function totalSupply() external view returns (uint256) {
        return _totalSupply;
    }

    function balanceOf(address account) external view returns (uint256) {
        return _balances[account];
    }

    function transfer(address to,uint256 value) public returns (bool) {
        require(to != address(0));
        require(_balances[msg.sender] >= value);
        require(_balances[to] + value >= _balances[to]);

        _balances[msg.sender] -= value;
        _balances[to] += value;
        emit Transfer(msg.sender, to, value);
    }
}

remix部署,調用transfer發起正常的轉賬

input0xa9059cbb00000000000000000000000071430fd8c82cc7b991a8455fc6ea5b37a06d393f0000000000000000000000000000000000000000000000000000000000000001

直接嘗試短地址攻擊,刪去轉賬地址的后兩位,會發現并不能通過,remix會直接報錯

這是因為web3.js做了校驗,web3.js是用戶與以太坊節點交互的媒介

源碼復現

通過源碼函數復現如下:

實際復現

至于如何完成實際場景的攻擊,可以參考文末的鏈接[1],利用web3.eth.sendSignedTransaction繞過限制

實際上,web3.js做的校驗僅限于顯式傳入轉賬地址的函數,如web3.eth.sendTransaction這種,像web3.eth.sendSignedTransactionweb3.eth.sendRawTransaction這種傳入的參數是序列化后的數據的就校驗不了,是可以完成短地址攻擊的,感興趣的可以自己嘗試,這里就不多寫了

PS:文中分析的go-ethereum源碼版本是commit-fdff182,源碼與最新版有些出入,但最新版的也未修復這種缺陷(可能官方不認為這是缺陷?),分析思路依然可以沿用

思考

以太坊底層EVM并沒有修復短地址攻擊的這么一個缺陷,而是直接在web3.js里對地址做的校驗,目前各種合約或多或少也做了校驗,所以雖然EVM底層可以復現,但實際場景中問題應該不大,但如果是開放RPC的節點可能還是會存在這種風險

另外還有一個點,按底層EVM的這種機制,易受攻擊的應該不僅僅是transfer(address to, uint256 value)這個點,只是因為這個函數是ERC20代幣標準,而且參數的設計恰好能導致涉及金額的短地址攻擊,并且特殊的地址易構造,所以這個函數常作為短地址攻擊的典型。在其他的一些非代幣合約,如競猜、游戲類的合約中,一些非轉賬類的事務處理函數中,如果不對類似地址這種的參數做長度校驗,可能也存在類似短地址攻擊的風險,也或者并不局限于地址,可能還有其他的利用方式還沒挖掘出來。

參考

[1] 以太坊短地址攻擊詳解

https://www.anquanke.com/post/id/159453

[2] 以太坊源碼解析:evm

https://www.jianshu.com/p/f319c78e9714


Paper 本文由 Seebug Paper 發布,如需轉載請注明來源。本文地址:http://www.bjnorthway.com/1296/