Re: Scalable ML course error on Lab Setup (Communi...

iago_gonzalez · ‎07-05-2023

After reviewing the DBAcademyHelper code, I have seen that the problem is that the Community Edition does not have the following features:
-Feature Store
-MLflow Model Registry
-MLflow Endpoints

I have read that the reason is that in the Community Edition they do not offer tools for production. I think it would be a good idea to include these features but limited (for example, that the Feature Store, Model Registry, Endpoints restart from the Community Edition after several hours), so no one can use Databricks for free in production, but the people we want to do the courses we could use it.

It is possible to modify the setup code to do some exercises, but I don't recommend it because it only allows you to do 1 or 2 exercises (the ones that only use MLflow Experiments), to complete the rest you would need to have Model Registry available.

---

If someone is interested in the setup code, these are the modifications that I made in "Includes/Classroom-Setup".

class CommunityEditionDBAcademyHelper(DBAcademyHelper):

    def cleanup(self, validate_datasets: bool = True) -> None:
        from dbacademy.dbhelper.dataset_manager_class import DatasetManager
        from dbacademy.dbhelper.workspace_cleaner_class import WorkspaceCleaner
        
        wc = WorkspaceCleaner(self)
        status = False
        if self.lesson_config.name is None:
            print(f"Resetting the learning environment:")
        else:
            print(f"Resetting the learning environment ({self.lesson_config.name}):")

        dbgems.spark.catalog.clearCache()
        status = wc._stop_all_streams() or status

        if self.lesson_config.enable_ml_support:
            try:
                status = wc._drop_feature_store_tables(lesson_only=True) or status
            except:
                print("WARNING: Feature Store not available!")
            try:
                status = wc._cleanup_mlflow_endpoints(lesson_only=True) or status
            except:
                print("WARNING: MLflow Model Registry not available!")
            try:
                status = wc._cleanup_mlflow_models(lesson_only=True) or status
            except:
                print("WARNING: Feature Store not available!")
            status = wc._cleanup_experiments(lesson_only=True) or status

        status = wc._drop_catalog() or status
        status = wc._drop_schema() or status

        # Always last to remove DB files that are not removed by sql-drop operations.
        status = wc._cleanup_working_dir() or status

        if not status:
            print("| No action taken")
        
        if validate_datasets:
            DatasetManager.from_dbacademy_helper(self).validate_datasets(fail_fast=True)
        
    
    def reset_lesson(self):
        return self.cleanup(validate_datasets=False)
    
    def reset_learning_environment(self):
        from dbacademy.dbhelper.workspace_cleaner_class import WorkspaceCleaner
    
        wc = WorkspaceCleaner(self)
        print("Resetting the learning environment for all lessons:")

        start = dbgems.clock_start()

        dbgems.spark.catalog.clearCache()
        wc._stop_all_streams()

        if self.lesson_config.enable_ml_support:
            try:
                wc._drop_feature_store_tables(lesson_only=False)
            except:
                print("WARNING: Feature Store not available!")
            try:
                wc._cleanup_mlflow_endpoints(lesson_only=False)
            except:
                print("WARNING: MLflow Model Registry not available!")
            try:
                wc._cleanup_mlflow_models(lesson_only=False)
            except:
                print("WARNING: Feature Store not available!")
            wc._cleanup_experiments(lesson_only=False)

        wc._reset_databases()
        wc._reset_datasets()
        wc._reset_working_dir()

        print(f"| the learning environment was successfully reset {dbgems.clock_stopped(start)}.")


import re

DA = CommunityEditionDBAcademyHelper(course_config, lesson_config)
DA.reset_lesson()
DA.init()

DA.init_mlflow_as_job()

DA.conclude_setup()

View solution in original post