ロードマップ
user
daemon
worker process
pid
worker connections
multi accept
server tokens
keepalive timeout
http_x_forwarded_for
healthcheck
location /
import pandas as pd df = pd.read_csv(INPUT_DATA_PATH_DIR + 'train.csv') df.columns
import pandas_profiling as pdp pdp.ProfileReport(df)
df_gb_label_group = pd.DataFrame({"count": df.groupby("label_group").size()})
posting_id = "AAABBBCCC" image = df.query('posting_id == @posting_id')['image'].iloc[-1]
for index, raw in df.iterrows(): sentence = raw["title"] print(sentence)
# faissインデックス作成 dimension = len(feature_texts[0]) nlist = min(100, len(feature_texts)) quantiser = faiss.IndexFlatL2(dimension) faiss_index = faiss.IndexIVFFlat(quantiser, dimension, nlist, faiss.METRIC_L2) # faissインデックスの学習・追加 faiss_index.train(feature_texts) faiss_index.add(feature_texts) # 近傍探索 faiss_index.nprobe = 10 s = time.time() distance_similar_texts, idx_similar_texts = faiss_index.search(feature_texts, 3) e = time.time() print("search time: {}".format(e-s))
tf-idf
# from cuml.feature_extraction.text import TfidfVectorizer from sklearn.feature_extraction.text import TfidfVectorizer model = TfidfVectorizer(stop_words = 'english', binary = True, max_features = FEATURE_TEXTS_DIM) feature_texts = model.fit_transform(df['title']).toarray()
resnet
OCR tesseract
import pytesseract # バッチ実行 result = pytesseract.image_to_string('./image_path.csv', lang="eng", config='--psm 3')
import numpy as np from matplotlib.pyplot import imshow import matplotlib.pyplot as plt %matplotlib inline #画像の読み込み im = Image.open(INPUT_DATA_PATH_DIR + "train_images/" + image, 'r') #画像をarrayに変換 im_list = np.asarray(im) #貼り付け plt.imshow(im_list) #表示 print(image) plt.show()
from cuml import PCA from cuml.neighbors import NearestNeighbors
kaggle
を追記してコンテナを再作成するpip install kaggle
# ホスト側で実行(bashrcなどに設定しておく) $ export KAGGLE_USERNAME=kaggle_username $ export KAGGLE_KEY=xxxxxxxxxxxxxx
... -e KAGGLE_USERNAME=${KAGGLE_USERNAME} \ -e KAGGLE_KEY=${KAGGLE_KEY} \ ...
$ kaggle competitions list
Input/Outputデータ
# mainブランチを最新にする $ git checkout main $ git status $ git pull origin main # → 上記同様、新たなブランチを作成する
1. Requirements 2. Conf 3. Utils 4. Models 5. Submit 6. Evaluate