antofra10 commited on
Commit
b72dbd8
·
1 Parent(s): 225af6a

Port changes from Milestone-5

Browse files
.gitignore CHANGED
@@ -188,5 +188,3 @@ cython_debug/
188
  # PyPI configuration file
189
  .pypirc
190
  .github/copilot-instructions.md
191
-
192
- docs/img/
 
188
  # PyPI configuration file
189
  .pypirc
190
  .github/copilot-instructions.md
 
 
Dockerfile CHANGED
@@ -10,6 +10,7 @@ ENV PYTHONDONTWRITEBYTECODE=1 \
10
  # Install system dependencies
11
  RUN apt-get update && apt-get install -y \
12
  git \
 
13
  && rm -rf /var/lib/apt/lists/*
14
 
15
  # Create a non-root user
@@ -21,12 +22,24 @@ WORKDIR /app
21
  # Copy requirements first for caching
22
  COPY requirements.txt .
23
 
24
- # Install dependencies
25
- RUN pip install --no-cache-dir -r requirements.txt
 
 
26
 
27
  # Copy the rest of the application
28
  COPY --chown=user:user . .
29
 
 
 
 
 
 
 
 
 
 
 
30
  # Make start script executable
31
  RUN chmod +x scripts/start_space.sh
32
 
 
10
  # Install system dependencies
11
  RUN apt-get update && apt-get install -y \
12
  git \
13
+ dos2unix \
14
  && rm -rf /var/lib/apt/lists/*
15
 
16
  # Create a non-root user
 
22
  # Copy requirements first for caching
23
  COPY requirements.txt .
24
 
25
+ # Remove -e . from requirements.txt to avoid installing the project before copying it
26
+ # and install dependencies
27
+ RUN sed -i '/-e \./d' requirements.txt && \
28
+ pip install --no-cache-dir -r requirements.txt
29
 
30
  # Copy the rest of the application
31
  COPY --chown=user:user . .
32
 
33
+ # Ensure the user has permissions on the app directory (needed for dvc init if .dvc is missing)
34
+ RUN chown -R user:user /app
35
+
36
+ # Fix line endings and permissions for the start script
37
+ RUN dos2unix scripts/start_space.sh && \
38
+ chmod +x scripts/start_space.sh
39
+
40
+ # Install the project itself
41
+ RUN pip install --no-cache-dir .
42
+
43
  # Make start script executable
44
  RUN chmod +x scripts/start_space.sh
45
 
docs/img/gui_detailed.png ADDED

Git LFS Details

  • SHA256: d1f9d9787a5c2cf6bf043e260ee90c6d07cc5a28ae08883a687330ade1076633
  • Pointer size: 131 Bytes
  • Size of remote file: 174 kB
docs/img/gui_detailed_input.png ADDED

Git LFS Details

  • SHA256: fd9f310ba2fe5c7c33a1eec8e68b383ca699d22ec040308383dd6a6b6ff129ae
  • Pointer size: 130 Bytes
  • Size of remote file: 80.9 kB
docs/img/gui_ex.png ADDED

Git LFS Details

  • SHA256: 6e5ff8ff2b58d6b829700e343e8c07b0574739f96c579322e61124944ac7e1eb
  • Pointer size: 130 Bytes
  • Size of remote file: 32.7 kB
docs/img/gui_main_dashboard.png ADDED

Git LFS Details

  • SHA256: cee606d345c48de3694ffea8f5ff76ac54892b1597d04839b1d117235b1721be
  • Pointer size: 130 Bytes
  • Size of remote file: 32.3 kB
docs/img/gui_quick_input.png ADDED

Git LFS Details

  • SHA256: f9a64c12a48ad5ab3a1fa9bf463f241cec5888b6cc7cadbd9f3df2e73c17e811
  • Pointer size: 130 Bytes
  • Size of remote file: 91.8 kB
hopcroft_skill_classification_tool_competition/config.py CHANGED
@@ -30,10 +30,10 @@ EMBEDDING_MODEL_NAME = "all-MiniLM-L6-v2"
30
  # API Configuration - which model to use for predictions
31
  API_CONFIG = {
32
  # Model file to load (without path, just filename)
33
- "model_name": "random_forest_embedding_gridsearch.pkl",
34
  # Feature type: "tfidf" or "embedding"
35
  # This determines how text is transformed before prediction
36
- "feature_type": "embedding",
37
  }
38
 
39
  # Training configuration
 
30
  # API Configuration - which model to use for predictions
31
  API_CONFIG = {
32
  # Model file to load (without path, just filename)
33
+ "model_name": "random_forest_tfidf_gridsearch.pkl",
34
  # Feature type: "tfidf" or "embedding"
35
  # This determines how text is transformed before prediction
36
+ "feature_type": "tfidf",
37
  }
38
 
39
  # Training configuration
hopcroft_skill_classification_tool_competition/main.py CHANGED
@@ -194,6 +194,8 @@ async def predict_skills(issue: IssueInput) -> PredictionRecord:
194
  )
195
 
196
  except Exception as e:
 
 
197
  raise HTTPException(
198
  status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
199
  detail=f"Prediction failed: {str(e)}",
 
194
  )
195
 
196
  except Exception as e:
197
+ import traceback
198
+ traceback.print_exc()
199
  raise HTTPException(
200
  status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
201
  detail=f"Prediction failed: {str(e)}",
requirements.txt CHANGED
@@ -13,6 +13,7 @@ seaborn
13
 
14
  # Data versioning
15
  dvc
 
16
  mlflow==2.16.0
17
  protobuf==4.25.3
18
 
@@ -25,6 +26,7 @@ fastapi[standard]>=0.115.0
25
  pydantic>=2.0.0
26
  uvicorn>=0.30.0
27
  httpx>=0.27.0
 
28
 
29
  # Development tools
30
  ipython
 
13
 
14
  # Data versioning
15
  dvc
16
+ dvc-s3
17
  mlflow==2.16.0
18
  protobuf==4.25.3
19
 
 
26
  pydantic>=2.0.0
27
  uvicorn>=0.30.0
28
  httpx>=0.27.0
29
+ streamlit>=1.28.0
30
 
31
  # Development tools
32
  ipython
scripts/start_space.sh CHANGED
@@ -3,6 +3,13 @@
3
  # Fail on error
4
  set -e
5
 
 
 
 
 
 
 
 
6
  # Determine credentials
7
  # Prefer specific DAGSHUB vars, fallback to MLFLOW vars (often the same for DagsHub)
8
  USER=${DAGSHUB_USERNAME:-$MLFLOW_TRACKING_USERNAME}
@@ -20,9 +27,17 @@ fi
20
 
21
  echo "Pulling models from DVC..."
22
  # Pull only the necessary files for inference
23
- dvc pull models/random_forest_tfidf_gridsearch.pkl \
24
- models/tfidf_vectorizer.pkl \
25
- models/label_names.pkl
 
 
 
 
 
 
 
26
 
27
- echo "Starting FastAPI application..."
28
- uvicorn hopcroft_skill_classification_tool_competition.main:app --host 0.0.0.0 --port 7860
 
 
3
  # Fail on error
4
  set -e
5
 
6
+ # Ensure DVC is initialized (in case .dvc folder was not copied)
7
+ if [ ! -d ".dvc" ]; then
8
+ echo "Initializing DVC..."
9
+ dvc init --no-scm
10
+ dvc remote add -d origin https://dagshub.com/se4ai2526-uniba/Hopcroft.dvc
11
+ fi
12
+
13
  # Determine credentials
14
  # Prefer specific DAGSHUB vars, fallback to MLFLOW vars (often the same for DagsHub)
15
  USER=${DAGSHUB_USERNAME:-$MLFLOW_TRACKING_USERNAME}
 
27
 
28
  echo "Pulling models from DVC..."
29
  # Pull only the necessary files for inference
30
+ dvc pull models/random_forest_tfidf_gridsearch.pkl.dvc \
31
+ models/tfidf_vectorizer.pkl.dvc \
32
+ models/label_names.pkl.dvc
33
+
34
+ echo "Starting FastAPI application in background..."
35
+ uvicorn hopcroft_skill_classification_tool_competition.main:app --host 0.0.0.0 --port 8000 &
36
+
37
+ # Wait for API to start
38
+ echo "Waiting for API to start..."
39
+ sleep 10
40
 
41
+ echo "Starting Streamlit application..."
42
+ export API_BASE_URL="http://localhost:8000"
43
+ streamlit run hopcroft_skill_classification_tool_competition/streamlit_app.py --server.port 7860 --server.address 0.0.0.0