{"id":977,"date":"2022-01-10T22:47:32","date_gmt":"2022-01-10T14:47:32","guid":{"rendered":"http:\/\/www.eait.co\/?p=977"},"modified":"2022-01-11T17:46:35","modified_gmt":"2022-01-11T09:46:35","slug":"pandas-datafram%e9%ab%98%e7%ba%a7%e6%9f%a5%e8%af%a2","status":"publish","type":"post","link":"https:\/\/notes.coremix.net\/?p=977","title":{"rendered":"pandas.datafram\u9ad8\u7ea7\u67e5\u8be2"},"content":{"rendered":"<pre class=\"brush: python; title: ; notranslate\" title=\"\">\r\n# pandas\u6587\u4ef6\u8bfb\u5199\r\nimport pandas as pd\r\n# data = pd.read_excel('path') # \u5176\u4ed6:csv,json,sql,query,html\r\n# data = pd.read_csv('data.csv', delimiter=',', encoding='utf-8')\r\n# data.to_excel('data.xlsx')\r\n# \u6570\u636e\u9009\u53d6:\r\ndata = pd.DataFrame(&#x5B;&#x5B;1, 2, 3], &#x5B;4, 5, 6], &#x5B;7, 8, 9]], index=&#x5B;'r1',\r\n'r2', 'r3'], columns=&#x5B;'c1', 'c2', 'c3'])\r\nprint('c1',data&#x5B;&#x5B;'c1']])  # \u5e26name\u7684\r\nprint('c1--',data&#x5B;'c1'])\r\nprint(data&#x5B;&#x5B;'c1', 'c3']])  # \u591a\u5217\u9009\u53d6  # \u6ce8\u610f\u53cc\u5217\u8868\u683c\u5f0f\r\n# \u6309\u884c\u9009\u53d6:\r\nprint(data&#x5B;1:2])# \u7b2c2\u884c\r\nprint(data&#x5B;1:3])# \u7b2c2-3\u884c\r\nprint(data&#x5B;-2:])# \u7b2c2-3\u884c\r\n# \u63a8\u8350:\r\nprint('*-'*66)\r\nprint(data.iloc&#x5B;2]) # \u8f6c\u7f6e\u884c\u5217(\u7b2c\u4e09\u884c\u53d8\u5217)\r\nprint(data.iloc&#x5B;2]&#x5B;'c2'])\r\nprint(data.iloc&#x5B;-1]) # \u8f6c\u7f6e\u884c\u5217\r\n\r\n#\u5934\u6570\u636e:\r\nprint(data.head(2))\r\n\r\nprint('*--'*55)\r\n# \u5b9e\u6218\u7528\u6cd5:\u9009\u884c,\u7136\u540e\u9009\u5217\r\nprint(data.iloc&#x5B;0:2]&#x5B;&#x5B;'c1', 'c3']])\r\nprint(data.iloc&#x5B;0:2]&#x5B;'c1'])\r\nprint('*'*55)\r\n# \u540c\u65f6\u9009\u53d6\u884c\u548c\u5217  # \u53ef\r\nprint(data.loc&#x5B;&#x5B;'r1', 'r2'], &#x5B;'c1', 'c3']])\r\nprint(data.loc&#x5B;&#x5B;'r2','r1'], &#x5B;'c1', 'c3']])\r\nprint(data.iloc&#x5B;0:2, &#x5B;0, 2]])  # \u591a\u4e2ai \u4f7f\u7528\u6570\u5b57\u505a\u7d22\u5f15 \u91cd\u8981\r\nprint(data.iloc&#x5B;:-1, &#x5B;0, 2]],'000')  # \u6309\u6570\u5b57\u9009\u53d6\u67d0\u51e0\u5217\r\nprint(data.iloc&#x5B;::-1, &#x5B;0, 2]],'000')  # \u6309\u6570\u5b57\u9009\u53d6\u67d0\u51e0\u5217(\u5e76\u5012\u5e8f,\u6b65\u8fdb-1)\r\nprint('*'*55)\r\n\r\n# \u6570\u636e\u7b5b\u9009:\r\nprint(data&#x5B;data&#x5B;'c1']&gt;1])  # \u9664\u4e86c1\u4e0d\u5927\u4e8e1 \u5269\u4f59\u6240\u6709\u7684\r\nprint(data&#x5B;(data&#x5B;'c1']&gt;1)&amp;(data&#x5B;'c2']==5)]) # \u683c\u5f0f\u7262\u8bb0\r\nprint(data&#x5B;(data&#x5B;'c1']&gt;1)|(data&#x5B;'c2']==5)]) # \u683c\u5f0f\u7262\u8bb0\r\nprint(data.columns.values.tolist())  # \u67e5\u8be2\u76f8\u5173\u7d22\u5f15\r\nprint(data.columns&#x5B;0],999)\r\nprint(data.columns,999)\r\nprint(data&#x5B;data.columns&#x5B;1]],999)\r\nprint(data._stat_axis.values.tolist())\r\n#\u67e5\u627e\u67d0\u5217\u7684\u67d0\u4e2a\u503c\r\nprint(data&#x5B;data&#x5B;'c1']==4],'44444')\r\nprint(data&#x5B;data&#x5B;'c1']==4].index.tolist(),'44444')# \u5e76\u8fd4\u56de\u884c\u53f7,\u6392\u5e8f,\u503c\r\nprint(data&#x5B;data&#x5B;'c1'].isin(&#x5B;4,7])])#\r\nprint(data&#x5B;data&#x5B;'c1'].isin(&#x5B;4,7])].index.tolist())#\r\n#\u67e5\u627e\u67d0\u884c\u7684\u67d0\u503c\r\nprint(list(data.iloc&#x5B;1]).index(5),'666')# \u8f6c\u4e3a\u5217\u8868\u67e5\u8be2\r\n\r\n# \u6570\u636e\u6392\u5e8f:\r\nprint(data.sort_values(by='c2',ascending=False))  #\u964d\u5e8f\r\nprint(data.sort_values(by='c2',ascending=True))  #\u5347\u5e8f\r\nprint(dfs.sort_values(by=&#x5B;0,2],ascending=&#x5B;False,False])) # \u591a\u5217\u6392\u5e8f\r\nprint('*'*55)\r\n\r\n# \u5220\u9664\uff1a\r\nprint(data.drop(columns='c1'))\r\nprint(data.drop(columns=&#x5B;'c1','c2']))\r\n\r\n# \u5176\u4ed6\u9ad8\u7ea7\u51fd\u6570:\r\nimport pandas as pd\r\n# \u62fc\u63a5:\r\ndf1 = pd.DataFrame({'\u516c\u53f8': &#x5B;'\u6052\u76db', '\u521b\u9510', '\u5feb\u5b66'], '\u5206\u6570': &#x5B;90, 95,\r\n85]})\r\ndf2 = pd.DataFrame({'\u516c\u53f8': &#x5B;'\u6052\u76db', '\u521b\u9510', '\u4eac\u897f'], '\u80a1\u4ef7': &#x5B;20,\r\n180, 30]})\r\ndf3 = pd.merge(df1,df2)\r\nprint(pd.merge(df1,df2)) #\u6839\u636e\u76f8\u540c\u5217\u540d\u8fdb\u884c\u5408\u5e76,\u5220\u9664\u591a\u4f59\u7684\u884c(inner\u4ea4\u96c6,\u5185\u8fde\u63a5)\r\nprint(pd.merge(df1,df2,on='\u516c\u53f8')) #\u540c\u540d\u4e0d\u6b62\u4e00\u4e2a\u7528on\u6307\u5b9a\r\nprint(pd.merge(df1,df2,how='outer')) #\u5916\u8fde\u63a5\u586b\u5145NaN\r\n    \u516c\u53f8    \u5206\u6570     \u80a1\u4ef7\r\n0  \u6052\u76db  90.0   20.0\r\n1  \u521b\u9510  95.0  180.0\r\n2  \u5feb\u5b66  85.0    NaN\r\n3  \u4eac\u897f   NaN   30.0\r\nprint(pd.merge(df1,df2,how='left')) #\u5916\u8fde\u63a5\u586b\u5145NaN\r\n    \u516c\u53f8  \u5206\u6570     \u80a1\u4ef7\r\n0  \u6052\u76db  90   20.0\r\n1  \u521b\u9510  95  180.0\r\n2  \u5feb\u5b66  85    NaN\r\n# \u6309\u884c\u7d22\u5f15\u8fdb\u884c\u5408\u5e76\r\nprint(pd.merge(df1,df2,left_index=True,right_index=True)) #\u6309\u884c\u5408\u5e76\r\n# \u76f4\u63a5\u62fc\u63a5:\r\nprint(pd.concat(&#x5B;df1,df2],sort=False))  # \u9ed8\u8ba4\u7eb5\u5411\u62fc\u63a5axis=0,\u5e76\u5408\u5e76\u6dfb\u52a0\u65b0\u5b57\u6bb5\r\nprint(pd.concat(&#x5B;df1,df2],axis=1))  # \u6a2a\u5411\u62fc\u63a5\r\n# append\u62fc\u63a5:\r\nprint(df1.append({'\u516c\u53f8':'AA','\u5206\u6570':'90'},ignore_index=True))\r\n\r\n# \u5b57\u7b26\u4e32\u8f6c\u65e5\u671f\u65f6\u95f4\r\n# \u8f6c\u6362\u4e3a\u65f6\u95f4\u7c7b\u578b\r\ndf&#x5B;&quot;date&quot;] = pd.to_datetime(df&#x5B;&quot;date&quot;], format='%Y-%m-%d')\r\n# \u83b7\u53d6\u5e74\r\ndf&#x5B;&quot;year&quot;] = pd.to_datetime(df&#x5B;&quot;date&quot;]).dt.year\r\n# \u83b7\u53d6\u6708\r\ndf&#x5B;&quot;month&quot;] = pd.to_datetime(df&#x5B;&quot;date&quot;]).dt.month\r\n# \u83b7\u53d6\u65e5\r\ndf&#x5B;&quot;day&quot;] = pd.to_datetime(df&#x5B;&quot;date&quot;]).dt.day\r\n# \u83b7\u53d6\u5468\r\ndf&#x5B;&quot;week&quot;] = pd.to_datetime(df&#x5B;&quot;date&quot;]).dt.week\r\nprint(df)\r\nprint(df.dtypes)\r\n\r\n# \u6309\u65f6\u95f4\u7b5b\u9009\r\n    searchtime = time.strptime('20211109-01:55:33','%Y%m%d-%H:%M:%S')\r\n    print(searchtime)\r\n    ddd=pd.datetime.strptime('20211109-01:55:33','%Y%m%d-%H:%M:%S')\r\n   i&#x5B;0]=i&#x5B;0].apply(lambda x:x.strftime('%Y%m%d-%H:%M:%S'))  #\u65f6\u95f4\u8f6c\u5b57\u7b26\u4e32\u65b9\u6cd5\r\n    print(dfs&#x5B;dfs&#x5B;0]&lt;ddd])\r\n\r\n\r\n\r\n<\/pre>\n","protected":false},"excerpt":{"rendered":"<p># pandas\u6587\u4ef6\u8bfb\u5199 import pandas as pd # data = pd.read_excel [&hellip;]<\/p>\n","protected":false},"author":1,"featured_media":0,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[3,23],"tags":[20],"class_list":["post-977","post","type-post","status-publish","format-standard","hentry","category-python","category-python_note","tag-python"],"blocksy_meta":[],"_links":{"self":[{"href":"https:\/\/notes.coremix.net\/index.php?rest_route=\/wp\/v2\/posts\/977","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/notes.coremix.net\/index.php?rest_route=\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/notes.coremix.net\/index.php?rest_route=\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/notes.coremix.net\/index.php?rest_route=\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/notes.coremix.net\/index.php?rest_route=%2Fwp%2Fv2%2Fcomments&post=977"}],"version-history":[{"count":2,"href":"https:\/\/notes.coremix.net\/index.php?rest_route=\/wp\/v2\/posts\/977\/revisions"}],"predecessor-version":[{"id":988,"href":"https:\/\/notes.coremix.net\/index.php?rest_route=\/wp\/v2\/posts\/977\/revisions\/988"}],"wp:attachment":[{"href":"https:\/\/notes.coremix.net\/index.php?rest_route=%2Fwp%2Fv2%2Fmedia&parent=977"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/notes.coremix.net\/index.php?rest_route=%2Fwp%2Fv2%2Fcategories&post=977"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/notes.coremix.net\/index.php?rest_route=%2Fwp%2Fv2%2Ftags&post=977"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}