算法入门【算法图解】 · 容与的技术博客

看的第一本算法书，图很多，讲的很详细，不过觉得讲的有点浅，像是专门给还不是程序员的人看的。

二分查找

输入一个有序的列表和一个元素，如果元素包含的列表中返回元素的位置，否则返回null。一般，对于包含n个元素的列表，二分查找需要log2n步，简单查找最多需要n步。

大O表示法，指出了算法运行时间的增速。二分查找表示为O(logn)，括号中叫做操作数。

import Foundation

// Note: If you aren’t familiar with Comparable, please check out “Generics” chapter in Swift book
func binarySearch <T: Comparable>(_ list: [T], item: T) -> Int? {
    // low and high keep track of which part of the list you'll search in.
    var low = 0
    var high = list.count - 1
    // While you haven't narrowed it down to one element ...
    while low <= high {
        //... check the middle element
        let mid = low + (high - low) / 2
        let guess = list[mid]
        // Found the item.
        if guess == item {
            return mid
        }
        // The guess was too high.
        if guess > item {
            high = mid - 1
        } else {
            low = mid + 1
        }
    }

    return nil
}

let myList = [1, 3, 5, 7, 9]
print(binarySearch(myList, item: 3) ?? "Not Found") // => 1
print(binarySearch(myList, item: -1) ?? "Not Found") // => Not Found

选择排序

数组预留位置是一种权变措施，可能浪费内存，超过这个值，需要转移。链表元素的内存可以在任何地方，数组是连续内存。链表每个元素存储了下一个元素的位置，链表的优势在插入元素方面，查找元素链表效率低。O(n)叫线性时间，O(1)叫常量时间。删除链表也是优势。数组可以随机访问，链表顺序访问。

选择排序速度不是很快，运行时间为O(n2)。

import Foundation

// Finds the smallest value in an array
func findSmallestIndex <T: Comparable> (_ arr: [T]) -> Int {
    // Stores the smallest value
    var smallest = arr[0]
    // We don't need any calculation if the array lenght is 1
    if arr.count == 1 {
        return 0
    }
    // Stores the index of the smallest value
    var smallestIndex = 0
    for i in 1...arr.count-1 {
        if arr[i] < smallest {
            smallest = arr[i]
            smallestIndex = i
        }
    }
    return smallestIndex
}

// Sort array
func selectionSort <T: Comparable> (arr: [T]) -> [T] {
    var newArr: [T] = []
    // We have to make mutableArray reference copy of original array, because Swift 3 doesn't allow to get var parameter
    var mutableArr = arr
    for _ in 0...mutableArr.count-1 {
       //Finds the smallest element in the array and adds it to the new array
        let smallestIndex = findSmallestIndex(mutableArr)
        newArr.append(mutableArr[smallestIndex])
        mutableArr.remove(at: smallestIndex)
    }
    return newArr
}

print(selectionSort(arr: [5, 3, 6, 2, 10])) // => [2, 3, 5, 6, 10]

递归

递归函数有基线条件和递归条件，基线条件指的是函数不再调用自己，避免无限循环。递归指的是调用自己。

尾递归

import Foundation

func fact(x: Int) -> Int {
    if x == 1 {
        return 1
    } else {
        return x*fact(x: x-1)
    }
}


print(fact(x: 5)) // => 120

快速排序

分而治之D&C，递归式解决方法。

欧几里得算法

涉及数组的递归基线条件通常是数组为空或只有一个元素。

Haskell，使用递归多，函数式编程。

快速排序比选择排序快，平均运行时间为O(nlogn)，也是最佳时间，每次都随机选择一个元素作为基准值就可以。

合并排序或者也叫归并排序 也是O(nlogn)。

import Foundation

//The following implementation of quick sort is little more classic than described in the book, but we have two use this one because of some “slice” feature limitation with array on Swift 3. Main concept is the same
func quicksort <T : Comparable> (_ array : [T]) -> [T] {
    if (array.count < 2) {
        // base case, arrays with 0 or 1 element are already "sorted"
        return array
    } else {
        // recursive case
        let pivot = array[0]
        // sub-array of all the elements less than the pivot
        let less = array.filter { $0 < pivot }
        // sub-array of all the elements equal to the pivot
        let equal = array.filter { $0 == pivot }
        // sub-array of all the elements greater than the pivot
        let greater = array.filter { $0 > pivot }
        return quicksort(less) + equal + quicksort(greater)
    }
}

print(quicksort([1, 5, 10, 25, 16, 1])) // => [1, 1, 5, 10, 16, 25]

散列表

散列表、散列映射、映射、字典、关联数组，不同的叫法一个概念。应用DNS解析、网页缓存。2个键映射到同一个位置，给这个位置用链表。散列函数将键均匀映射到不同位置，避免冲突。插入删除与链表一样快，查找与数组一样快。填装因子度量的是散列表中的空闲位置，填装因子变大，需要调整长度，通常将数组长度增大一倍。填装因子大于0.7就需要扩增散列表长度。

SHA函数 用作散列函数。

广度优先搜索

最短路径问题，用于图查找。运行时间为O(V+E)，V是顶点，E为边数。

队列FIFO，栈LIFO。

有向图，无向图。有依赖关系，叫拓扑排序。

import Foundation

// As I can see Swift doesn't have Queue default implementation, so we have to use custom on, Degue structure from Swift Algorithm Club
// https://github.com/raywenderlich/swift-algorithm-club/tree/master/Deque
public struct Deque<T> {
    private var array = [T]()

    public var isEmpty: Bool {
        return array.isEmpty
    }

    public var count: Int {
        return array.count
    }

    public mutating func enqueue(_ element: T) {
        array.append(element)
    }

    public mutating func enqueueFront(_ element: T) {
        array.insert(element, at: 0)
    }

    public mutating func dequeue() -> T? {
        if isEmpty {
            return nil
        } else {
            return array.removeFirst()
        }
    }

    public mutating func dequeueBack() -> T? {
        if isEmpty {
            return nil
        } else {
            return array.removeLast()
        }
    }

    public func peekFront() -> T? {
        return array.first
    }

    public func peekBack() -> T? {
        return array.last
    }
}

func persionIsSeller(name: String) -> Bool {
    return name.characters.last == "m"
}

var graph = [String : [String]]()
graph["you"] = ["alice", "bob", "claire"]
graph["bob"] = ["anuj", "peggy"]
graph["alice"] = ["peggy"]
graph["claire"] = ["thom", "jonny"]
graph["anuj"] = []
graph["peggy"] = []
graph["thom"] = []
graph["jonny"] = []

func search(name: String) -> Bool {
    var searchQueue = Deque<String>()
    //Swift Note: Our custom Deque doesn't have possibility to add new element as array so we have to add elements one by one (insted of +=graph["person"] in the book example)
    for string in graph[name]! {
        searchQueue.enqueue(string)
    }
    // This array is how you keep track of which people you've searched before.
    var searched = [String]()
    while !searchQueue.isEmpty {
        let person = searchQueue.dequeue()
        // Only search this person if you haven't already searched them
        if !searched.contains(person!) {
            if persionIsSeller(name: person!) {
                print("\(person!) is a mango seller!")
                return true
            } else {
                for string in graph[person!]! {
                    searchQueue.enqueue(string)
                }
                // Marks this person as searched
                searched.append(person!)
            }
        }
    }

    return false
}

if search(name: "you") == false {
    print("Mango seller Not Found!")
} // => thom is a mango seller!

狄克斯特拉算法

非加权图用广度优先搜索，加权图使用狄克斯特拉算法。环增加权重，不可能是最短路径。狄克斯特拉适用于有向无环图。负权边不能用狄克斯特拉，得用贝尔曼-福德算法。

import Foundation

// the graph
var graph =  [String : [String: Double]] ()
graph["start"] = [String: Double]()
graph["start"]?["a"] = 6
graph["start"]?["b"] = 2

graph["a"] = [String: Double]()
graph["a"]?["fin"] = 1

graph["b"] = [String: Double]()
graph["b"]?["a"] = 3
graph["b"]?["fin"] = 5

graph["fin"] = [String: Double]()

// the costs table
let infinity = Double.infinity
var costs = [String: Double]()
costs["a"] = 6
costs["b"] = 2
costs["fin"] = infinity

// the parents table
var parents = [String: String]()
parents["a"] = "start"
parents["b"] = "start"
parents["fin"] = nil

var processed = [String]()

func findLowestCostNode(costs: [String: Double]) -> [String: Double] {
    var lowestCost = Double.infinity
    var lowestCostNode = [String: Double]()
    // Go through each node.
    for node in costs {
        let cost = node.value
        // If it's the lowest cost so far and hasn't been processed yet...
        if (cost < lowestCost) && !processed.contains(node.key) {
            // ... set it as the new lowest-cost node.
            lowestCost = cost
            lowestCostNode = [node.key : node.value]
        }

    }
    return lowestCostNode
}

// Find the lowest-cost node that you haven't processed yet.
var node = findLowestCostNode(costs: costs)

// If you've processed all the nodes, this while loop is done.
while !node.isEmpty {
    // Swift Note: Unfortunately there are some limits for working with Dictionary inside Dictionary, so we have to use temp "nodeFirstKey" variable as workaround
    var nodeFirstKey = node.first?.key
    var cost = costs[nodeFirstKey!]
    // Go through all the neighbors of this node.
    var neighbors = graph[nodeFirstKey!]
    for n in (neighbors?.keys)! {
        var newCost = cost! + (neighbors?[n])!
        // If it's cheaper to get to this neighbor by going through this node...
        if costs[n]! > newCost {
            // ... update the cost for this node.
            costs[n] = newCost
            // This node becomes the new parent for this neighbor.
            parents[n] = nodeFirstKey
        }
    }
    // Mark the node as processed.
    processed.append(nodeFirstKey!)
    // Find the next node to process, and loop.
    node = findLowestCostNode(costs: costs)
}


print("Cost from the start to each node:")
print(costs) // -> ["b": 2.0, "fin": 6.0, "a": 5.0]

贪婪算法

每步都选择最优做法，结果与正确结果接近。集合问题的贪婪算法，O(n2)。NP完全问题，需要计算所有的解，选出最好的。NP只能求近似解，非NP能容易算出解。组合、序列、集合可能是NP完全问题，需要考虑所有情况不能分解，数量多变复杂速度变慢。

import Foundation

// You pass an array in, and it gets converted to a set.
var statesNeeded : Set = ["mt", "wa", "or", "id", "nv", "ut", "ca", "az"]

var stations = [String: Set<String>]()
stations["kone"] = ["id", "nv", "ut"]
stations["ktwo"] = ["wa", "id", "mt"]
stations["kthree"] = ["or", "nv", "ca"]
stations["kfour"] = ["nv", "ut"]
stations["kfive"] = ["ca", "az"]

var finalStations = Set<String>();

while !statesNeeded.isEmpty {
    var bestStation = String()
    var statesCovered = Set<String>()

    for station in stations {
        var covered = statesNeeded.intersection(station.value)
        if covered.count > statesCovered.count {
            bestStation = station.key
            statesCovered = covered
        }
        statesNeeded = statesNeeded.subtracting(statesCovered)
        //Swift note: We should avoid adding empty station to Set
        if !bestStation.isEmpty {
            finalStations.insert(bestStation)
        }
    }
}

print(finalStations) // -> ["kone", "kfive", "ktwo", "kthree"]

动态规划

求最优解，画网格图。依赖于子问题是离散的。应用，DNA相似性、git、diff、拼写检查等。

K最近邻算法

KNN，分类和回归，分类就是编组，回归是预测。余弦相似度，计算矢量距离。KNN用于机器学习，OCR，人脸识别，语音识别。训练数据。朴素贝叶斯分类器预测垃圾邮件概率。

其他算法

二叉查找树，O(logn),插入删除也快，不能随机访问。B树、红黑树、堆、伸展树。搜索引擎，反向索引。傅里叶变换，压缩音视频，地震预测、DNA分析。并行算法。MapReduce,分布式算法，短时间内完成海量工作。布隆过滤器是概率性数据结构，答案不一定准确，占用存储空间小。SHA，根据字符串返回散列值，比较文件，检查密码，simhash，局部敏感，检查相似程度，如论文查重。RSA加密。