0%

IP相关统计

统计IP访问量(独立ip访问数量)
awk ‘{print $1}’ access.log | sort -n | uniq | wc -l
查看某一时间段的IP访问量(4-5点)

grep "07/Apr/2017:0[4-5]" access.log | awk '{print $1}' | sort | uniq -c| sort -nr | wc -l  

查看访问最频繁的前100个IP

awk '{print $1}' access.log | sort -n |uniq -c | sort -rn | head -n 100

查看访问100次以上的IP

awk '{print $1}' access.log | sort -n |uniq -c |awk '{if($1 >100) print $0}'|sort -rn

查询某个IP的详细访问情况,按访问频率排序

grep '127.0.01' access.log |awk '{print $7}'|sort |uniq -c |sort -rn |head -n 100

页面访问统计

查看访问最频的页面(TOP100)

awk '{print $7}' access.log | sort |uniq -c | sort -rn | head -n 100

查看访问最频的页面([排除php页面])(TOP100)

grep -v ".php"  access.log | awk '{print $7}' | sort |uniq -c | sort -rn | head -n 100 

查看页面访问次数超过100次的页面

cat access.log | cut -d ' ' -f 7 | sort |uniq -c | awk '{if ($1 > 100) print $0}' | less

查看最近1000条记录,访问量最高的页面

tail -1000 access.log |awk '{print $7}'|sort|uniq -c|sort -nr|less

每秒请求量统计

统计每秒的请求数,top100的时间点(精确到秒)

awk '{print $4}' access.log |cut -c 14-21|sort|uniq -c|sort -nr|head -n 100

每分钟请求量统计

统计每分钟的请求数,top100的时间点(精确到分钟)

awk '{print $4}' access.log |cut -c 14-18|sort|uniq -c|sort -nr|head -n 100

每小时请求量统计

统计每小时的请求数,top100的时间点(精确到小时)

awk '{print $4}' access.log |cut -c 14-15|sort|uniq -c|sort -nr|head -n 100

性能分析

在nginx log中最后一个字段加入$request_time

列出传输时间超过 3 秒的页面,显示前20条

cat access.log|awk '($NF > 3){print $7}'|sort -n|uniq -c|sort -nr|head -20

蜘蛛抓取统计

统计蜘蛛抓取次数

grep 'Baiduspider' access.log |wc -l

统计蜘蛛抓取404的次数

grep 'Baiduspider' access.log |grep '404' | wc -l

最近有个业务场景:
需要抓出coinmarketcap下的所有虚拟币链接里面的官网地址

coin
website

比如 bitcoin 里面的 websitewebsite2

这样算起来大概有1300多个url,如果直接采用顺序抓取,那io瓶颈会十分明显

使用requests来爬取

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
# coding: utf-8
from __future__ import print_function
import requests
from bs4 import BeautifulSoup
from bs4 import SoupStrainer
import csv
import os
import time


class Spider(object):
def __init__(self):
self.session = requests.session()
self.targetUrl = None
self.headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36',
}

def getResponse(self, url=None):
try:
self.session.headers = self.headers
self.targetUrl = url
resp = self.session.get(self.targetUrl, headers=self.headers)
return resp
except Exception as e:
print(e)

def getCoinList(self):
url = 'https://files.coinmarketcap.com/generated/search/quick_search.json'
try:
resp = self.getResponse(url)
except Exception as e:
print(e)
return resp.json()

def getWebSite(self, slug):
url = 'https://coinmarketcap.com/currencies/%s/' % slug
try:
html = self.getResponse(url).content
only_a_title = SoupStrainer('ul', attrs={'class': 'list-unstyled'})
soup = BeautifulSoup(html, "lxml", parse_only=only_a_title)
links = soup.select('span[title="Website"] ~ a')
r = []
for x in links:
r.append(x['href'])
return ' , '.join(r)
except Exception as e:
print(e)
return ''

def dumpCSV(self):
path = os.path.join(os.path.split(os.path.realpath(__file__))[0], 'coin.csv')
results = self.getCoinList()
with open(path, 'w') as f:
fieldnames = ['rank', 'name', 'symbol', 'website']
wr = csv.DictWriter(f, fieldnames=fieldnames)
wr.writeheader()
i = 1
for x in results:
print(i, x['name'])
websites = self.getWebSite(x['slug'])
wr.writerow({'name': x['name'], 'symbol': x['symbol'], 'rank': x['rank'], 'website': websites})
i += 1


if __name__ == '__main__':
spider = Spider()
start = time.time()
spider.dumpCSV()
print(time.time() - start)

执行下来,大概需要830s,也就是13分钟。
这样的速度实在太慢,改用asyncio+aiohttp,性能暴增

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
# coding:utf-8
import aiohttp
import asyncio
from bs4 import BeautifulSoup
from bs4 import SoupStrainer
import json
import csv
import os
import time
import sys

# Linux下使用uvloop代替自带的loop
if sys.platform == 'linux':
import uvloop
asyncio.set_event_loop_policy(uvloop.EventLoopPolicy())

# winddows下使用IOCP
if sys.platform == 'win32':
loop = asyncio.ProactorEventLoop()
asyncio.set_event_loop(loop)


async def getWeb(slug, sem):
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36'}
url = 'https://coinmarketcap.com/currencies/{}/'.format(slug)
try:
with await sem:
async with aiohttp.ClientSession() as session:
async with session.get(url, headers=headers, timeout=10) as resp:
return await resp.text()
except Exception as e:
print(e)


# 先抓取列表
async def getList():
global result
url = 'https://files.coinmarketcap.com/generated/search/quick_search.json'
async with aiohttp.ClientSession() as session:
async with session.get(url) as resp:
result = await resp.text()


async def main(slug, sem):
global link
# 因为数量不是很大,这里直接用字典来缓存结果
# 数量多的话可以使用redis来做缓存
link = {}
try:
html = await getWeb(slug, sem)
# 使用beautifulsoup解析文档
only_a_title = SoupStrainer('ul', attrs={'class': 'list-unstyled'})
soup = BeautifulSoup(html, "lxml", parse_only=only_a_title)
links = soup.select('span[title="Website"] ~ a')
r = []
if links is not None:
for x in links:
r.append(x['href'])
link[slug] = ','.join(r)
print(r)
except Exception as e:
print(e)

try:
start = time.time()
loop = asyncio.get_event_loop()
loop.run_until_complete(getList())

# 控制并发数
sem = asyncio.Semaphore(200)
tasks = [main(x['slug'], sem) for x in json.loads(result)]

loop.run_until_complete(asyncio.wait(tasks))
except Exception as e:
print(e)

path = os.path.join(os.path.split(os.path.realpath(__file__))[0], 'coin.csv')

# 把抓取结果写到csv文件里面
with open(path, 'w') as f:
fieldnames = ['rank', 'name', 'symbol', 'website']
wr = csv.DictWriter(f, fieldnames=fieldnames)
wr.writeheader()
for x in json.loads(result):
print(x['rank'], x['name'])
s = x['slug']
if s in link:
site = link[s]
else:
site = ''
wr.writerow({'name': x['name'], 'symbol': x['symbol'], 'rank': x['rank'], 'website': site})


print(time.time() - start)

执行下来只要不到1分钟,性能增加了10倍不止
带宽利用率也达到了最大化
band

asyncio已经在python3的标准库中,是未来的趋势。
官方文档

直接上代码:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
import asyncio
import time

now = lambda: time.time()

#模拟异步任务
async def do_some_work(x):
print('Waiting: ', x)
await asyncio.sleep(x)
return 'Done after {}s'.format(x)

start = now()

coroutine1 = do_some_work(4)
coroutine2 = do_some_work(4)
coroutine3 = do_some_work(3)

#起三个任务
tasks = [
asyncio.ensure_future(coroutine1),
asyncio.ensure_future(coroutine2),
asyncio.ensure_future(coroutine3)
]

loop = asyncio.get_event_loop()
loop.run_until_complete(asyncio.wait(tasks))

for task in tasks:
print('Task ret: ', task.result())

print('TIME: ', now() - start)

输出:

1
2
3
4
5
6
7
Waiting:  4
Waiting: 4
Waiting: 3
Task ret: Done after 4s
Task ret: Done after 4s
Task ret: Done after 3s
TIME: 4.003447771072388

三个任务,每个任务分别需要4秒,4秒,3秒,当任务遇到await关键字时,就会挂起,然后执行其他任务,总的消耗时间大致上会接近4秒(取决于最长消耗时间的任务),实际上会额外消耗一点时间用于任务切换,但是所有任务均在一个线程内,这个开销基本上可以忽略不计。

最近业务用上了google oauth2登陆,google文档写的很详细,在python flask很容易就能实现oauth2登陆.
oauth2的原理一张图就能概括:
webflow.png

需要用到的库:
The Google APIs Client Library for Python:

1
pip install --upgrade google-api-python-client

The google-auth, google-auth-oauthlib, and google-auth-httplib2 for user authorization.

1
pip install --upgrade google-auth google-auth-oauthlib google-auth-httplib2

下面贴一下关键代码(省略了很多无关的):

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
#登陆界面
@app.route('/index')
def login_index():
return render_template('login.html')

#登陆
@app.route('/login', methods=['GET', 'POST'])
def login():
if 'credentials' not in session:
#首先要获取凭据,调用授权方法
return redirect(url_for('authorize'))

credentials = google.oauth2.credentials.Credentials(
**session['credentials'])

#到这里就授权成功,可以通过authed_session来调用google api
authed_session = AuthorizedSession(credentials)

#调用google api 获取授权用户的信息
response = authed_session.get('https://www.googleapis.com/userinfo/v2/me')
user = json.loads(response.text)
#用户账号存入session
session['user'] = user['email']
#返回登陆成功页面
return render_template('success.html')

#注销
@app.route('/logout')
def logout():
if 'credentials' not in session:
return redirect(url_for('login_index'))
credentials = google.oauth2.credentials.Credentials(
**session['credentials'])
try:
# 注销凭据需要传入一个凭据token参数然后post到相应的api地址
revoke = requests.post('https://accounts.google.com/o/oauth2/revoke',
params={'token': credentials.token},
headers={'content-type': 'application/x-www-form-urlencoded'})

status_code = getattr(revoke, 'status_code')
#注销成功后删除用户登陆session
del session['user']
if status_code == 200:
#删除session中的凭据
del session['credentials']
flash('Logout successful!', 'success')
return render_template('login.html')
except Exception as e:
app.logger.info(e)

#授权
@app.route('/authorize')
def authorize():
#这里需要一个在google cloud 平台申请的一个凭据密钥,client_secret.json
#需要到https://console.cloud.google.com/apis/credentials申请
CLIENT_SECRETS_FILE = app.config['CLIENT_SECRETS_FILE']
#需要申请的权限范围
SCOPES = app.config['SCOPES']
#回调地址
REDIRECT_URI = app.config['REDIRECT_URI']
flow = google_auth_oauthlib.flow.Flow.from_client_secrets_file(
CLIENT_SECRETS_FILE, scopes=SCOPES)

flow.redirect_uri = REDIRECT_URI
authorization_url, state = flow.authorization_url(
access_type='offline',
include_granted_scopes='true')
session['state'] = state
return redirect(authorization_url)

#授权成功后的回调函数
@app.route('/oauth2callback')
def oauth2callback():
try:
state = session['state']
CLIENT_SECRETS_FILE = app.config['CLIENT_SECRETS_FILE']
SCOPES = app.config['SCOPES']
REDIRECT_URI = app.config['REDIRECT_URI']
flow = google_auth_oauthlib.flow.Flow.from_client_secrets_file(
CLIENT_SECRETS_FILE, scopes=SCOPES, state=state)
flow.redirect_uri = REDIRECT_URI
authorization_response = request.url
#获取授权token
flow.fetch_token(authorization_response=authorization_response)
credentials = flow.credentials
#存储凭据到session
session['credentials'] = credentials_to_dict(credentials)
except Exception as e:
if 'credentials' in session:
del session['credentials']
app.logger.info(e)
return redirect(url_for('login_index'))
return redirect(url_for('login'))


def credentials_to_dict(credentials):
return {'token': credentials.token,
'refresh_token': credentials.refresh_token,
'token_uri': credentials.token_uri,
'client_id': credentials.client_id,
'client_secret': credentials.client_secret,
'scopes': credentials.scopes}

gunicorn 是python web app比较好用的wsgi,下面是一段平时快速启动的shell script。
把代码保存成 gunicorn.sh ,添加执行权限 sudo chmod +x gunicorn.sh

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
#解释下几个参数:
# -w 设置工作者数,推荐为cpu核心数*2+1
# --reload app代码更新自动重装
# -u 启动用户
# --log-file 日志输出目录
# -k 使用其他异步模块启动如gevent
# -D 作为deamon进程启动
P=5000
worker=3
host="127.0.0.1"
case "$@" in
start)
gunicorn -b $host:$P -w $worker --reload -u root --log-file /var/log/gunicorn/gunicorn.log -k gevent -D app:app
;;
stop)
kill -9 `ps aux|grep gunicorn|grep $name|awk '{print $2}'|xargs`
;;
status)
pids=$(ps aux|grep gunicorn|grep $name)
echo "$pids"
;;
restart)
kill -9 `ps aux|grep gunicorn|grep $name|awk '{print $2}'|xargs`
sleep 1
gunicorn -b $host:$P -w $worker --reload -u root --log-file /var/log/gunicorn/gunicorn.log -k gevent -D app:app
;;
reload)
ps aux |grep gunicorn |grep $name | awk '{print $2}'|xargs kill -HUP
;;
*)
echo 'unknown arguments args(start|stop|status|restart|reload)'
exit 1
;;
esac

gunicorn.sh放到wsgi启动程序的目录,例子中wsgi程序是app.py
接着启动只需
gunicorn.sh start
停止
gunicorn.sh stop
查看进程
gunicorn.sh status
重载
gunicorn.sh reload
重启
gunicorn.sh restart

用起来有是不是有种nginx的感觉:) ,更多gunicorn相关设置可以到官网文档查看

参考:
https://github.com/shadowsocks/shadowsocks/blob/master/README.md
python版:
安装配置比较简单
Debian / Ubuntu:

apt-get install python-pip
pip install shadowsocks

参数说明:
-p 端口号
-k 密码
-m 加密方式

前台运行:

ssserver -p 443 -k password -m rc4-md5

如果要后台运行:

sudo ssserver -p 443 -k password -m rc4-md5 --user nobody -d start

如果要停止:

sudo ssserver -d stop

如果要检查日志:

sudo less /var/log/shadowsocks.log

用 -h 查看所有参数。也可以使用配置文件进行配置。

配置文件相关:

https://github.com/shadowsocks/shadowsocks/wiki/Configuration-via-Config-File

创建一个config配置文件 /etc/shadowsocks.json. Example:

{
    "server":"my_server_ip",
    "server_port":8388,
    "local_address": "127.0.0.1",
    "local_port":1080,
    "password":"mypassword",
    "timeout":300,
    "method":"aes-256-cfb",
    "fast_open": false
}

前台运行:

ssserver -c /etc/shadowsocks.json

后台运行:

ssserver -c /etc/shadowsocks.json -d start
ssserver -c /etc/shadowsocks.json -d stop

shadowsocks还有一个C语言编译版,叫shadowssocks-libev
编译完占用内存比python版小,兼容python版的运行命令和配置文件,安装配置复杂点.
参考: https://github.com/shadowsocks/shadowsocks/blob/master/README.md

服务器优化:

官方要给的优化参考: https://github.com/shadowsocks/shadowsocks/wiki/Optimizing-Shadowsocks

Linux 4.9+ 内核可以启用Google BBR 拥塞算法来加速TCP
我们用的谷歌VM服务器默认内核已经是4.10了,可以直接使用BBR

开启:

echo "net.core.default_qdisc=fq" >> /etc/sysctl.conf
echo "net.ipv4.tcp_congestion_control=bbr" >> /etc/sysctl.conf

保存并生效:

sysctl -p

参考
http://blog.csdn.net/dog250/article/details/52830576
http://blog.leanote.com/post/quincyhuang/google-bbr
https://www.zhihu.com/question/53559433/answer/135903103

再做网站SEO的时候,通常要提交sitemap文件
百度可以再百度站长后台直接提交,
Google和bing也都有自家平台的提交方法,
不过也有快捷的方法,比如:

Google:
http://www.google.com/webmasters/tools/ping?sitemap={url}
Bing:
http://www.bing.com/webmaster/ping.aspx?siteMap={url}

将{url}替换成站点sitemap的地址然后再浏览器中访问就可以了

使用正则表达式字面量和使用 RegExp 构造函数创建的正则表达式不一样。在 ECMAScript 3 中, 正则表达式字面量始终会共享同一个RegExp实例,而使用构造函数创建的每一个新RegExp实例都是一个新实例。来看下面的例子。

1
2
3
4
5
6
7
8
9
10
11
var re = null,
i;
for (i=0; i < 10; i++){
re = /cat/g;
console.log( re.test("catastrophe"));
}
for (i=0; i < 10; i++){
re = new RegExp("cat", "g");
console.log( re.test("catastrophe"));

}

在第一个循环中,即使是循环体中指定的,但实际上只为 /cat/ 创建了一个 RegExp 实例。由于实例属性不会重置,所以在循环中再次调用 test() 方法会失败。这是因为第一次调用 test() 找到了”cat”,但第二次调用是从索引为 3 的字符(上一次匹配的末尾)开始的,所以就找不到它了。由于会测试到字符串末尾,所以下一次再调用 test()就又从开头开始了。
第二个循环使用 RegExp 构造函数在每次循环中创建正则表达式。因为每次迭代都会创建一个新的 RegExp 实例,所以每次调用 test()都会返回 true。

  • 如果采用正则对象方式,RegExg接收的是字符串,\反斜杠是转义字符,\d会变成d,此时需要使用两个反斜杠,即\d来达到\d效果;但是在字面量方式中,不是字符串,所以使用一个反斜杠即可。
  • 正则对象方式,可以接收参数,而正则字面量方式不可以。

匹配重复数字 如

1
2
3
111111
222222
333333333

可以使用

1
^(\d)\1{5}$

(\d) 匹配一位数字
\1 匹配分组1中的内容,即(\d)
{5} 匹配\1五次

匹配AABB数字,如 1122

1
^(\d)\1{1}(\d)\2{1}$

  • 原则, 以datetime为中心, 起点或中转, 转化为目标对象, 涵盖了大多数业务场景中需要的日期转换处理

步骤:

  1. 掌握几种对象及其关系
  2. 了解每类对象的基本操作方法
  3. 通过转化关系转化

涉及对象

  1. datetime
    1
    2
    3
    4
    5
    6
    >>> import datetime
    >>> now = datetime.datetime.now()
    >>> now
    datetime.datetime(2015, 1, 12, 23, 9, 12, 946118)
    >>> type(now)
    <type 'datetime.datetime'>
  1. timestamp
    1
    2
    3
    >>> import time
    >>> time.time()
    1421075455.568243
  1. time tuple
    1
    2
    3
    >>> import time
    >>> time.localtime()
    time.struct_time(tm_year=2015, tm_mon=1, tm_mday=12, tm_hour=23, tm_min=10, tm_sec=30, tm_wday=0, tm_yday=12, tm_isdst=0)
  1. string
    1
    2
    3
    >>> import datetime
    >>> datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    '2015-01-12 23:13:08'
  1. date
    1
    2
    3
    >>> import datetime
    >>> datetime.datetime.now().date()
    datetime.date(2015, 1, 12)

datetime基本操作

  1. 获取当前datetime
    1
    2
    3
    >>> import datetime
    >>> datetime.datetime.now()
    datetime.datetime(2015, 1, 12, 23, 26, 24, 475680)
  1. 获取当天date
    1
    2
    >>> datetime.date.today()
    datetime.date(2015, 1, 12)
  1. 获取明天/前N天

明天

1
2
>>> datetime.date.today() + datetime.timedelta(days=1)
datetime.date(2015, 1, 13)

三天前

1
2
3
4
>>> datetime.datetime.now()
datetime.datetime(2015, 1, 12, 23, 38, 55, 492226)
>>> datetime.datetime.now() - datetime.timedelta(days=3)
datetime.datetime(2015, 1, 9, 23, 38, 57, 59363)
  1. 获取当天开始和结束时间(00:00:00 23:59:59)
    1
    2
    3
    4
    >>> datetime.datetime.combine(datetime.date.today(), datetime.time.min)
    datetime.datetime(2015, 1, 12, 0, 0)
    >>> datetime.datetime.combine(datetime.date.today(), datetime.time.max)
    datetime.datetime(2015, 1, 12, 23, 59, 59, 999999)
  1. 获取两个datetime的时间差
    1
    2
    >>> (datetime.datetime(2015,1,13,12,0,0) - datetime.datetime.now()).total_seconds()
    44747.768075
  1. 获取本周/本月/上月最后一天

本周

1
2
3
4
5
6
>>> today = datetime.date.today()
>>> today
datetime.date(2015, 1, 12)
>>> sunday = today + datetime.timedelta(6 - today.weekday())
>>> sunday
datetime.date(2015, 1, 18)

本月

1
2
3
4
5
6
>>> import calendar
>>> today = datetime.date.today()
>>> _, last_day_num = calendar.monthrange(today.year, today.month)
>>> last_day = datetime.date(today.year, today.month, last_day_num)
>>> last_day
datetime.date(2015, 1, 31)

获取上个月的最后一天(可能跨年)

1
2
3
4
>>> import datetime
>>> today = datetime.date.today()
>>> first = datetime.date(day=1, month=today.month, year=today.year)
>>> lastMonth = first - datetime.timedelta(days=1)

关系转换

几个关系之间的转化

Datetime Object / String / timestamp / time tuple

关系转换例子

datetime <=> string
datetime -> string

1
2
3
>>> import datetime
>>> datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
'2015-01-12 23:13:08'

string -> datetime

1
2
3
>>> import datetime
>>> datetime.datetime.strptime("2014-12-31 18:20:10", "%Y-%m-%d %H:%M:%S")
datetime.datetime(2014, 12, 31, 18, 20, 10)

datetime <=> timetuple
datetime -> timetuple

1
2
3
>>> import datetime
>>> datetime.datetime.now().timetuple()
time.struct_time(tm_year=2015, tm_mon=1, tm_mday=12, tm_hour=23, tm_min=17, tm_sec=59, tm_wday=0, tm_yday=12, tm_isdst=-1)

timetuple -> datetime

timetuple => timestamp => datetime [看后面datetime<=>timestamp]

datetime <=> date
datetime -> date

1
2
3
>>> import datetime
>>> datetime.datetime.now().date()
datetime.date(2015, 1, 12)

date -> datetime

1
2
3
4
5
6
7
>>> datetime.date.today()
datetime.date(2015, 1, 12)
>>> today = datetime.date.today()
>>> datetime.datetime.combine(today, datetime.time())
datetime.datetime(2015, 1, 12, 0, 0)
>>> datetime.datetime.combine(today, datetime.time.min)
datetime.datetime(2015, 1, 12, 0, 0)

datetime <=> timestamp
datetime -> timestamp

1
2
3
4
>>> now = datetime.datetime.now()
>>> timestamp = time.mktime(now.timetuple())
>>> timestamp
1421077403.0

timestamp -> datetime

1
2
>>> datetime.datetime.fromtimestamp(1421077403.0)
datetime.datetime(2015, 1, 12, 23, 43, 23)