data analysis & visualization

temp

postgeSQL 사용을 위한 SQL에 넣는 방법과 가져오는 방법

연결하기

In [1]:
import psycopg2
In [3]:
!conda install -c anaconda psycopg2 -y
Collecting package metadata (current_repodata.json): done
Solving environment: done

## Package Plan ##

  environment location: /home/test/anaconda3/envs/jupyter

  added / updated specs:
    - psycopg2


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    krb5-1.16.4                |       h173b8e3_0         1.4 MB  anaconda
    libedit-3.1.20181209       |       hc058e9b_0         188 KB  anaconda
    libpq-11.5                 |       hd9ab2ff_2         2.5 MB  conda-forge
    psycopg2-2.8.4             |   py36h1ba5d50_0         171 KB  anaconda
    ------------------------------------------------------------
                                           Total:         4.2 MB

The following NEW packages will be INSTALLED:

  krb5               anaconda/linux-64::krb5-1.16.4-h173b8e3_0
  libedit            anaconda/linux-64::libedit-3.1.20181209-hc058e9b_0
  libpq              conda-forge/linux-64::libpq-11.5-hd9ab2ff_2
  psycopg2           anaconda/linux-64::psycopg2-2.8.4-py36h1ba5d50_0



Downloading and Extracting Packages
krb5-1.16.4          | 1.4 MB    | ##################################### | 100% 
libedit-3.1.20181209 | 188 KB    | ##################################### | 100% 
psycopg2-2.8.4       | 171 KB    | ##################################### | 100% 
libpq-11.5           | 2.5 MB    | ##################################### | 100% 
Preparing transaction: done
Verifying transaction: done
Executing transaction: done
In [2]:
conn = psycopg2.connect(
    "host='<도메인>' dbname='<DB명>' user='<사용자명>' password='<비번>'")
cur = conn.cursor()
In [3]:
from sqlalchemy import create_engine
e = create_engine('postgresql://<사용자명>:<비번>@<도메인>:<port>/<DB명>')

파일 DB에 밀어넣기

In [4]:
cur.execute("""SELECT table_name FROM information_schema.tables
       WHERE table_schema = 'public'""")
for table in cur.fetchall():
    print(table)
('test',)
('inventory',)
('AddressPoints',)
('test1',)
('test2',)
('fruits',)
('ptdj',)
('test3',)
('a',)
In [6]:
import pandas as pd
score = pd.DataFrame(
    {
        'date': ['2019-07-28'] * 4,
        'name': ['kim', 'lee', 'choi', 'park'],
        'age': [19, 20, 19, 20],
        'math_score': [91, 95, 92, 70],
        'pass_yn': [True, True, True, False]
    },
    columns=['date', 'name', 'age', 'math_score', 'pass_yn'])
In [7]:
score
Out[7]:
date name age math_score pass_yn
0 2019-07-28 kim 19 91 True
1 2019-07-28 lee 20 95 True
2 2019-07-28 choi 19 92 True
3 2019-07-28 park 20 70 False
In [8]:
score.to_sql(name='score', con=e)
In [9]:
cur.execute("""SELECT table_name FROM information_schema.tables
       WHERE table_schema = 'public'""")
for table in cur.fetchall():
    print(table)
('test',)
('inventory',)
('AddressPoints',)
('test1',)
('test2',)
('fruits',)
('ptdj',)
('test3',)
('a',)
('score',)

지도자료 밀어넣기

In [11]:
import pyproj
from fiona.crs import from_epsg

from sqlalchemy import *
from geoalchemy2 import Geometry
from geoalchemy2 import WKTElement
In [14]:
import geopandas as gpd
shp = gpd.read_file('./CTPRVN_201905/TL_SCCO_CTPRVN.shp', encoding='cp949')
shp
Out[14]:
CTPRVN_CD CTP_ENG_NM CTP_KOR_NM geometry
0 11 Seoul 서울특별시 POLYGON ((966987.226 1941110.946, 966987.119 1...
1 26 Busan 부산광역시 MULTIPOLYGON (((1138209.365 1673609.179, 11381...
2 27 Daegu 대구광역시 POLYGON ((1107777.593 1780522.269, 1107845.409...
3 28 Incheon 인천광역시 MULTIPOLYGON (((909743.413 1946023.117, 909736...
4 29 Gwangju 광주광역시 POLYGON ((932712.687 1696168.692, 932781.680 1...
5 30 Daejeon 대전광역시 POLYGON ((990946.725 1832389.260, 991057.692 1...
6 31 Ulsan 울산광역시 MULTIPOLYGON (((1167949.952 1710285.043, 11679...
7 36 Sejong-si 세종특별자치시 POLYGON ((971235.936 1844386.985, 971234.055 1...
8 41 Gyeonggi-do 경기도 MULTIPOLYGON (((931607.470 1894480.382, 931653...
9 42 Gangwon-do 강원도 MULTIPOLYGON (((1163759.191 1909653.101, 11637...
10 43 Chungcheongbuk-do 충청북도 POLYGON ((1042689.435 1917662.690, 1042731.339...
11 44 Chungcheongnam-do 충청남도 MULTIPOLYGON (((919207.926 1777520.288, 919117...
12 45 Jeollabuk-do 전라북도 MULTIPOLYGON (((902676.539 1717491.597, 902679...
13 46 Jellanam-do 전라남도 MULTIPOLYGON (((934759.097 1614071.335, 934759...
14 47 Gyeongsangbuk-do 경상북도 MULTIPOLYGON (((1171290.353 1782200.848, 11712...
15 48 Gyeongsangnam-do 경상남도 MULTIPOLYGON (((1053643.218 1612344.022, 10536...
16 50 Jeju-do 제주특별자치도 MULTIPOLYGON (((885004.560 1458756.461, 884996...
In [15]:
shp.plot(column='CTP_KOR_NM')
Out[15]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f5ce96ab390>
In [20]:
shp1 = shp.copy()
In [21]:
shp1.to_sql('a', e, if_exists='replace', index=False)
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
pandas/_libs/lib.pyx in pandas._libs.lib.infer_dtype()

TypeError: Cannot convert GeometryArray to numpy.ndarray

During handling of the above exception, another exception occurred:

AttributeError                            Traceback (most recent call last)
<ipython-input-21-b2691c00fb27> in <module>
----> 1 shp1.to_sql('a', e, if_exists='replace', index=False)

~/anaconda3/envs/jupyter/lib/python3.6/site-packages/pandas/core/generic.py in to_sql(self, name, con, schema, if_exists, index, index_label, chunksize, dtype, method)
   2661             chunksize=chunksize,
   2662             dtype=dtype,
-> 2663             method=method,
   2664         )
   2665 

~/anaconda3/envs/jupyter/lib/python3.6/site-packages/pandas/io/sql.py in to_sql(frame, name, con, schema, if_exists, index, index_label, chunksize, dtype, method)
    519         chunksize=chunksize,
    520         dtype=dtype,
--> 521         method=method,
    522     )
    523 

~/anaconda3/envs/jupyter/lib/python3.6/site-packages/pandas/io/sql.py in to_sql(self, frame, name, if_exists, index, index_label, schema, chunksize, dtype, method)
   1312             index_label=index_label,
   1313             schema=schema,
-> 1314             dtype=dtype,
   1315         )
   1316         table.create()

~/anaconda3/envs/jupyter/lib/python3.6/site-packages/pandas/io/sql.py in __init__(self, name, pandas_sql_engine, frame, index, if_exists, prefix, index_label, schema, keys, dtype)
    620         if frame is not None:
    621             # We want to initialize based on a dataframe
--> 622             self.table = self._create_table_setup()
    623         else:
    624             # no data provided, read-only mode

~/anaconda3/envs/jupyter/lib/python3.6/site-packages/pandas/io/sql.py in _create_table_setup(self)
    866         from sqlalchemy import Table, Column, PrimaryKeyConstraint
    867 
--> 868         column_names_and_types = self._get_column_names_and_types(self._sqlalchemy_type)
    869 
    870         columns = [

~/anaconda3/envs/jupyter/lib/python3.6/site-packages/pandas/io/sql.py in _get_column_names_and_types(self, dtype_mapper)
    858         column_names_and_types += [
    859             (str(self.frame.columns[i]), dtype_mapper(self.frame.iloc[:, i]), False)
--> 860             for i in range(len(self.frame.columns))
    861         ]
    862 

~/anaconda3/envs/jupyter/lib/python3.6/site-packages/pandas/io/sql.py in <listcomp>(.0)
    858         column_names_and_types += [
    859             (str(self.frame.columns[i]), dtype_mapper(self.frame.iloc[:, i]), False)
--> 860             for i in range(len(self.frame.columns))
    861         ]
    862 

~/anaconda3/envs/jupyter/lib/python3.6/site-packages/pandas/io/sql.py in _sqlalchemy_type(self, col)
    950         # Infer type of column, while ignoring missing values.
    951         # Needed for inserting typed data containing NULLs, GH 8778.
--> 952         col_type = lib.infer_dtype(col, skipna=True)
    953 
    954         from sqlalchemy.types import (

pandas/_libs/lib.pyx in pandas._libs.lib.infer_dtype()

pandas/_libs/lib.pyx in pandas._libs.lib._try_infer_map()

AttributeError: 'GeometryDtype' object has no attribute 'base'

바로 넣으면 에러가 뜸 shapely를 활용할 예정

In [27]:
import shapely
In [25]:
test = shp1.geometry[1]
test
Out[25]:
In [49]:
shapely.wkt.dumps(test)[1:100]
Out[49]:
'ULTIPOLYGON (((1138209.3649876813869923 1673609.1794120199047029, 1138184.5415377025492489 1673597.'
In [29]:
shapely.wkt.loads(shapely.wkt.dumps(shp1.geometry[1]))
Out[29]:
In [30]:
shp1['geometry'] = shp1.apply(lambda x: shapely.wkt.dumps(x.geometry), axis=1)
In [31]:
shp1
Out[31]:
CTPRVN_CD CTP_ENG_NM CTP_KOR_NM geometry
0 11 Seoul 서울특별시 POLYGON ((966987.2258638255298138 1941110.9461...
1 26 Busan 부산광역시 MULTIPOLYGON (((1138209.3649876813869923 16736...
2 27 Daegu 대구광역시 POLYGON ((1107777.5933314983267337 1780522.268...
3 28 Incheon 인천광역시 MULTIPOLYGON (((909743.4125806831289083 194602...
4 29 Gwangju 광주광역시 POLYGON ((932712.6874476601369679 1696168.6917...
5 30 Daejeon 대전광역시 POLYGON ((990946.7248482395661995 1832389.2596...
6 31 Ulsan 울산광역시 MULTIPOLYGON (((1167949.9516389914788306 17102...
7 36 Sejong-si 세종특별자치시 POLYGON ((971235.9357597539201379 1844386.9848...
8 41 Gyeonggi-do 경기도 MULTIPOLYGON (((931607.4699318613857031 189448...
9 42 Gangwon-do 강원도 MULTIPOLYGON (((1163759.1907545514404774 19096...
10 43 Chungcheongbuk-do 충청북도 POLYGON ((1042689.4352804463123903 1917662.690...
11 44 Chungcheongnam-do 충청남도 MULTIPOLYGON (((919207.9262066939845681 177752...
12 45 Jeollabuk-do 전라북도 MULTIPOLYGON (((902676.5389720955863595 171749...
13 46 Jellanam-do 전라남도 MULTIPOLYGON (((934759.0967011926695704 161407...
14 47 Gyeongsangbuk-do 경상북도 MULTIPOLYGON (((1171290.3528821258805692 17822...
15 48 Gyeongsangnam-do 경상남도 MULTIPOLYGON (((1053643.2178877647966146 16123...
16 50 Jeju-do 제주특별자치도 MULTIPOLYGON (((885004.5599632168887183 145875...
In [32]:
type(shp1)
Out[32]:
geopandas.geodataframe.GeoDataFrame
In [33]:
shp1.to_sql('a', e, if_exists='replace', index=False)

밀어넣기가 잘 된거 같으나 ...

In [34]:
import pandas as pd
shp1 = pd.read_sql('select * from a', e)
shp1
Out[34]:
CTPRVN_CD CTP_ENG_NM CTP_KOR_NM geometry
0 11 Seoul 서울특별시 POLYGON ((966987.2258638255298138 1941110.9461...
1 26 Busan 부산광역시 MULTIPOLYGON (((1138209.3649876813869923 16736...
2 27 Daegu 대구광역시 POLYGON ((1107777.5933314983267337 1780522.268...
3 28 Incheon 인천광역시 MULTIPOLYGON (((909743.4125806831289083 194602...
4 29 Gwangju 광주광역시 POLYGON ((932712.6874476601369679 1696168.6917...
5 30 Daejeon 대전광역시 POLYGON ((990946.7248482395661995 1832389.2596...
6 31 Ulsan 울산광역시 MULTIPOLYGON (((1167949.9516389914788306 17102...
7 36 Sejong-si 세종특별자치시 POLYGON ((971235.9357597539201379 1844386.9848...
8 41 Gyeonggi-do 경기도 MULTIPOLYGON (((931607.4699318613857031 189448...
9 42 Gangwon-do 강원도 MULTIPOLYGON (((1163759.1907545514404774 19096...
10 43 Chungcheongbuk-do 충청북도 POLYGON ((1042689.4352804463123903 1917662.690...
11 44 Chungcheongnam-do 충청남도 MULTIPOLYGON (((919207.9262066939845681 177752...
12 45 Jeollabuk-do 전라북도 MULTIPOLYGON (((902676.5389720955863595 171749...
13 46 Jellanam-do 전라남도 MULTIPOLYGON (((934759.0967011926695704 161407...
14 47 Gyeongsangbuk-do 경상북도 MULTIPOLYGON (((1171290.3528821258805692 17822...
15 48 Gyeongsangnam-do 경상남도 MULTIPOLYGON (((1053643.2178877647966146 16123...
16 50 Jeju-do 제주특별자치도 MULTIPOLYGON (((885004.5599632168887183 145875...
In [35]:
type(shp1)
Out[35]:
pandas.core.frame.DataFrame

가지고 오면 자료의 타입은 pandas 므로 geopandas 로 바꾸어 줘야되며

In [36]:
import geopandas as gpd
shp1 = gpd.GeoDataFrame(shp1)
type(shp1)
Out[36]:
geopandas.geodataframe.GeoDataFrame
In [37]:
shp1.plot()
/home/test/anaconda3/envs/jupyter/lib/python3.6/site-packages/geopandas/plotting.py:351: FutureWarning:     You are passing non-geometry data to the GeoSeries constructor. Currently,
    it falls back to returning a pandas Series. But in the future, we will start
    to raise a TypeError instead.
  expl_series = geopandas.GeoSeries(geoms)
---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-37-1665a42b35a0> in <module>
----> 1 shp1.plot()

~/anaconda3/envs/jupyter/lib/python3.6/site-packages/geopandas/geodataframe.py in plot(self, *args, **kwargs)
    654         from there.
    655         """
--> 656         return plot_dataframe(self, *args, **kwargs)
    657 
    658     plot.__doc__ = plot_dataframe.__doc__

~/anaconda3/envs/jupyter/lib/python3.6/site-packages/geopandas/plotting.py in plot_dataframe(df, column, cmap, color, ax, cax, categorical, legend, scheme, k, vmin, vmax, markersize, figsize, legend_kwds, classification_kwds, missing_kwds, **style_kwds)
    545             figsize=figsize,
    546             markersize=markersize,
--> 547             **style_kwds
    548         )
    549 

~/anaconda3/envs/jupyter/lib/python3.6/site-packages/geopandas/plotting.py in plot_series(s, cmap, color, ax, figsize, **style_kwds)
    351     expl_series = geopandas.GeoSeries(geoms)
    352 
--> 353     geom_types = expl_series.type
    354     poly_idx = np.asarray((geom_types == "Polygon") | (geom_types == "MultiPolygon"))
    355     line_idx = np.asarray(

~/anaconda3/envs/jupyter/lib/python3.6/site-packages/pandas/core/generic.py in __getattr__(self, name)
   5272             if self._info_axis._can_hold_identifiers_and_holds_name(name):
   5273                 return self[name]
-> 5274             return object.__getattribute__(self, name)
   5275 
   5276     def __setattr__(self, name: str, value) -> None:

AttributeError: 'Series' object has no attribute 'type'

dump로 바꾼부분때문에 그림이 그려지지 않으므로 loads해주고 사용하여야 된다.

In [38]:
shp1['geometry'] = shp1.apply(lambda x: shapely.wkt.loads(x.geometry), axis=1)
In [39]:
shp1.plot()
Out[39]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f5ce936fe10>
In [40]:
shp.plot(column='CTP_KOR_NM')
Out[40]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f5ce933e828>

동적인 그림 그리기

In [41]:
from fiona.crs import from_epsg
In [42]:
import folium
m = folium.Map(location=[40.730610, -73.935242],
               tiles='Stamen Toner',
               zoom_start=12,
               control_scale=True,
               prefer_canvas=True)
In [43]:
m
Out[43]:
In [ ]:
 

'python' 카테고리의 다른 글

pandas-profiling  (0) 2020.06.11
anaconda 활용법  (0) 2020.06.04
python 메일 보내기  (0) 2020.02.24
jupyter notebook 가상환경에 basemap 설치하기  (0) 2020.02.23
numpy 정리(1)  (0) 2020.02.18