Module functions.metatables.main
Classes
class MetaTable2 (df: pandas.core.frame.DataFrame, meta: pyreadstat._readstat_parser.metadata_container, project_number: int, project_name: str, columns: dict[str, Column] = _Nothing.NOTHING, groups: dict[str, Group] = _Nothing.NOTHING, weights: dict[str, Weight] = _Nothing.NOTHING, year: str = '2025', output_path: str = './output/', projects_path: str = './gfs_projects/')
-
Creates MetaTable object that makes data wrangling of files read with pyreadstat easy.
Args
df
:pd.DataFrame
- DataFrame read with pyreadstat
meta
:pyreadstat._readstat_parser.metadata_container
- pyreadstat metadata
project_number
:int
- The project number. Is used as a folder name for the generated files
project_name
:str
- The name of the project. Is used as a folder name for the generated files
columns
:dict[str, Column]
- metadata of all dataframe columns. Is part of gfs-meta
groups
:dict[str, Group]
- metadata of all column groups. Is part of gfs-meta
year
:str
- current year. Is used to choose the save folder
output_path
:str
, optional- Path of the output. Defaults to './output/'.
projects_path
:str
, optional- Path of the git projects folder. Defaults to './gfs_projects/'.
Method generated by attrs for class MetaTable2.
Class variables
var columns : dict[str, Column]
var df : pandas.core.frame.DataFrame
var groups : dict[str, Group]
var meta : pyreadstat._readstat_parser.metadata_container
var output_path : str
var project_name : str
var project_number : int
var projects_path : str
var weights : dict[str, Weight]
var year : str
Methods
def add_column_to_group(self, column: str, group: str)
-
Adds column to a group.
Args
column
:str
- Name of the column
group
:str
- Group to add the column
def add_to_group(self, column: str)
-
Adds info to the group that the column belongs to it.
Args
column
:str
- Name of the column
def calculate_weight(self, column: str, target_values: dict, return_df: bool = False)
-
Calculates the weights for a given weight column based on the target values. You need to create the weight before you can calculate it.
Args
column
:str
- Name of the weight column
target_values
:dict
- Dictionary with the target values for each intersection. This is a nested dictionary and it accepts absolute values. Can be like {1: {1: 20, 2: 25}, 2: {1: 40, 2: 50}}
return_df
:bool
- If True it will return the weighted dataframe. Defaults to False.
def check_duplicates(self, row:
) ‑> bool -
function to check for duplicates in a row
Args
row
:np.array
- row to apply the function to
Returns
bool
- if duplicates exist returns True, else False
def check_missing_columns(self, expected_columns: list)
def check_sav_prebreak(self, method: str = 'CATI', Interviewer_Column: str = 'ENQ2', Interview_Duration_Column: str = 'DURINT', Date_Column: str = 'DATE') ‑> None
-
checks and prints multiple key features of a sav file from nebu
Args
method
:str
, optional- string to indicate the method. Defaults to "CATI". possible values are "CATI" or "OTHER"
Interviewer_Column
:str
, optional- Interviewer Code Column. Defaults to "ENQ2".
Interview_Duration_Column
:str
, optional- Interview Duration Column. Defaults to "DURINT".
Date_Column
:str
, optional- Interview Date Column. Defaults to "DATE".
def copy_column(self, old_column: str, new_column: str, same_group: bool = True, add_to_group: bool = True)
-
Creates a copy of a column and gives it a new name.
Args
old_column
:str
- Name of the column to be copied
new_column
:str
- Name of the new column
same_group
:bool
, optional- If True the column will be added to the same group as the copied column. Defaults to True.
def copy_group(self, old_group: str, new_group: str)
-
Creates a copy of a column group and gives it a new name.
Args
old_group
:str
- Name of the group to be copied
new_group
:str
- Name of the new group
def create_column(self, column: str, label: str, value_labels: Union[dict[int, str], str, ForwardRef(None)] = None, measure: str = 'scale')
-
Creates a new column in the MetaTable and the DataFrame.
Args
column
:str
- Name of the column
label
:str
- Label of the column
value_labels
:Union[dict[int, str], str]
, optional- Value labels of the column. Defaults to None.
measure
:str
, optional- Measure of the column. Defaults to 'scale'.
def create_group(self, group_name: str, columns: list[str], kind: str, measure: str = 'auto', lfm: str = 'yes', mean: str = 'auto', group_label: str = 'auto', group_value_labels: Union[dict[int, str], str] = 'auto', missing_values: Union[list[float], str] = 'auto')
-
Creates a new group in the MetaTable.
Args
group_name
:str
- Name of the group
columns
:list[str]
- List of columns that belong to the group
kind
:str
- Kind of the group. Can be 'multi' or 'batch'
measure
:str
, optional- Measure of the group. Can be 'string', 'nominal', 'scale' or 'ordinal'. Defaults to 'auto'.
lfm
:str
, optional- Decides if the value labels of to group should be used for all columns in the group. Can be 'yes' or 'no'. Defaults to 'yes'.
mean
:str
, optional- Decides if there is a useful mean value for a group of columns. Can be 'yes' or 'no'. Defaults to 'auto'.
group_label
:str
, optional- Label of the group. Defaults to 'auto'.
group_value_labels
:Union[dict[int, str], str]
, optional- Value labels of the group. Defaults to 'auto'.
missing_values
:Union[list[float], str]
, optional- List of missing values of the group. Defaults to 'auto'.
def create_weight(self, name: str, columns: list[str])
-
Creates a new weight based on the given columns.
Args
name
:str
- Name of the new weight
columns
:list[str]
- List of columns that should be used to calculate the weight.
def delete_column(self, column: str)
-
Deletes a column and the information about it from its group.
Args
column
:str
- Name of the column
def delete_group(self, group: str)
-
Deletes a column group and the information about it in every column.
Args
group
:str
- Name of the group
def encode(self, old_column: str, new_column: str, values: Optional[dict[str, int]] = None)
-
Encodes a column based on a dictionary with the new values.
Args
old_column
:str
- Name of the column
new_column
:str
- Name of the new column
values
:Optional[dict[str, int]]
- A dictionary with the new values and the old values that should be replaced. Can be {"yes": 1, "no": 2}. This will replace all values from "yes" to 1 and "no" to 2.
def export_coding_excel(self, column_lists: list[list[str]], filename: str = 'toCode', darker_columns: list = None, use_value_labels: bool = False) ‑> None
-
Exports a .xlsx-file with the given columns and their value labels. This is mostly used for coding open questions.
Args
column_lists
:list[list[str]]
- List of lists with the columns that should be exported. Can be [['CODERESP'], ['F1@', 'F1_01', 'F1_02', 'F1_03']]. All columns of every sublist will have the same background color.
filename
:str
- File name of the .xlsx file. Defaults to 'toCode'.
darker_columns
:list
- List of columns that should have a darker background color. Defaults to None.
use_value_labels
:bool
- If True it will display the value labels instead of the codes. Defaults to False.
def export_config(self, export_df: bool = True, gfs_config_name: str = 'gfs-config')
-
Exports an excel-file that makes changing the meta data very simple.
Args
export_df
:bool
- If True it will also export the data. Defaults to True.
def export_data(self, file_name: str = 'fertig') ‑> None
-
Exports a .SAV-file and a gfs-meta JSON-file.
Args
file_name
:str
- File name of the .sav file
def filter_label(self, column: str, filter_label: str)
-
Updates the filter_label of a column.
Args
column
:str
- Name of the column
filter_label
:str
- New filter_label of the column. This label adds information about the filter that was used for that question in the questionnaire.
def get_intersection_counts(self, categorical_columns: list[str])
-
Get the count for all combinations of the given columns.
Args
categorical_columns
:list[str]
- List of columns that should be used to calculate the intersection counts.
def group_filter_label(self, group: str, filter_label: str)
-
Updates the filter_label of a group of columns.
Args
group
:str
- Name of the group
filter_label
:str
- New filter_label of the group. This label adds information about the filter that was used for that question in the questionnaire.
def group_has_mean(self, group: str, mean: str)
-
Updates if there is a useful mean value for a group of columns.
Args
group
:str
- Name of the group
mean
:str
- New mean state of the group. Should be "yes" or "no"
def group_kind(self, group: str, kind: str)
-
Updates the kind of a group of columns.
Args
group
:str
- Name of the group
kind
:str
- New kind of the group. Should be "multi", "single" or "batch"
def group_label(self, group: str, text: str, verbose: bool = True)
-
Updates the group_label of a group of kind = "batch" or "multi".
Args
group
:str
- Name of the group of columns
text
:str
- New text of the group_label
verbose
:bool
- Prints warnings if True. Defaults to True.
def group_lfm(self, group: str, lfm: str)
-
Updates the lfm (label from group) of a group of columns.
Args
group
:str
- Name of the group
lfm
:str
- New lfm of the group. Should be "yes" or "no"
def group_measure(self, group: str, measure: str)
-
Updates the measure of a group of columns.
Args
group
:str
- Name of the group
measure
:str
- New measure of the group. Should be "nominal", "string", "scale" or "ordinal"
def group_missing_values(self, group: str, missing_values: list[float])
-
Updates the missing values of a group of columns.
Args
group
:str
- Name of the group
missing_values
:list[float]
- New missing values of the group.
def group_value_labels(self, group: str, value_labels: Union[dict[int, str], str], keep_untouched_codes: bool = False)
-
Updates the value labels of a group of columns.
Args
group
:str
- Name of the group
value_labels
:Union[dict[int, str], str]
- A dictionary with new labels {1 : "label for code 1", 2: "label for code 2"} or the column name with the labels to be used "column_name"
keep_untouched_codes
:bool
, optional- This will keep the old labels of the column and just add the new ones instead of replacing all labels. Defaults to False.
def has_mean(self, column: str, mean: str)
-
Updates if there is a useful mean value for a column.
Args
column
:str
- Name of the column
mean
:str
- New mean state of the group. Should be "yes" or "no"
def import_config(self, gfs_config_name: str = 'gfs-config') ‑> None
-
Imports the gfs-config excel-file and updates the MetaTable according to the changes made in excel.
Args
gfs_config_name
:str
, optional- name of config if it should not be default or multiple configs are used. Defaults to "gfs-config".
Raises
FileNotFoundError
- if a config file is not found
ValueError
- description
def item_label(self, column: str, text: str, verbose: bool = True)
-
Updates the item_label of a variable of kind = "batch".
Args
column
:str
- Name of the column
text
:str
- New text of the item_label
verbose
:bool
- Prints warnings if True. Defaults to True.
def kind(self, column: str, kind: str)
-
Updates the kind of a column.
Args
column
:str
- Name of the column
kind
:str
- New kind of the column. Should be "multi", "single" or "batch"
def make_quota_check(self, columns: list[str], filename_quotas: str = 'cross_tab', filename_quota_check: str = 'quota_check', calc_quota_difference: bool = False, save_quota_check: bool = False)
-
Calculates the difference between a crosstab and a quota
Args
columns
:list[str]
- list of dataframe columns in crosstab
filename_quotas
:str
, optional- name of the excel file where the crosstab is
filename_quota_check
:str
, optional- name of the excel file where the difference in quotas is saved
calc_quota_difference
:bool
, optional- boolean to indicate if difference in quota is calculated
save_quota_check
:bool
, optional- boolean to indicate if difference in quota is saved in an excel file
def measure(self, column: str, measure: str)
-
Updates the measure of a column.
Args
column
:str
- Name of the column
measure
:str
- New measure of the column. Should be "nominal", "string", "scale" or "ordinal"
def merge_open_questions(self, df_open_questions: pandas.core.frame.DataFrame, columns: list, code_list: dict, group_name: str, group_label: str = '', merge_Id: str = 'CODERESP', group_kind='multi', measure: str = 'auto', check_for_duplicates: bool = True) ‑> None
-
merges open questions with the metatable dataframe
Args
df_open_questions
:pd.DataFrame
- open question dataframe
columns
:list
- list of columns to merge (normally a group)
code_list
:dict
- dictionary with the new code list (used for group value labels)
group_name
:str
- group name to use
group_label
:str
, optional- Label for the group. Defaults to "".
merge_Id
:str
- id to merge columns on, defaults to CODERESP
group_kind
:str
, optional- kind of group. Defaults to 'multi'.
measure
:str
, optional- measure of the group. Defaults to 'auto'.
check_for_duplicates
:True
, optional- check duplicates overrule parameter, duplicates are not checked if set to False. Defaults to 'True'.
def merge_semiopen_questions(self, df_semiopen_questions: pandas.core.frame.DataFrame, columns: list, code_list: dict, group_name: str, merge_Id: str = 'CODERESP', check_for_duplicates: bool = True) ‑> None
-
merges semi open questions with the metatable dataframe
Args
df_semiopen_questions
:pd.DataFrame
- semiopen questions dataframe
columns
:list
- list of columns to merge (normally a group)
code_list
:dict
- dictionary with the new code list (used for group value labels)
group_name
:str
- group name to use
merge_Id
:str
- id to merge columns on, defaults to CODERESP
check_for_duplicates
:True
, optional- check duplicates overrule parameter, duplicates are not checked if set to False. Defaults to 'True'.
def missing_values(self, column: str, missing_values: list[float])
-
Updates the missing values of a column.
Args
column
:str
- Name of the column
missing_values
:list[float]
- New missing values of the column.
def move_column(self, column: str, end: bool = True)
-
Moves a column to the beginning or the end of the MetaTable
Args
column
:str
- Column to be moved
end
:bool
, optional- If end is True the column is moved to the end, if end is False the column is moved to the beginning. Defaults to True.
def move_columns(self, column_order: list)
-
Moves columns based on the desired order in the MetaTable.
Args
column_order
:list
- The desired column order.
def randomise_divers_gender(self, gender_column='S11', divers_values: list = [3], seed: int = 12345)
-
randomises the divers gender value to either 1 or 2 with a change of 50/50, asserts that 1 and 2 are male and female values
Args
gender_column
:str
, optional- column name which has the values for gender. Defaults to "S11".
- divers_values (list(int), optional): values which equals to divers labels, if multiple are given the \
- randomisation is executed for each label sequentially. Defaults to 3.
seed
:int
, optional- randomised seed, should normally not be changed. Defaults to 12345.
def recode(self, old_column: str, new_column: str, values: dict[int, typing.Any], keep_untouched_codes: bool = True)
-
Recodes a column based on a dictionary with the new values.
Args
old_column
:str
- Name of the column
new_column
:str
- Name of the new column
values
:dict[int, Any]
- A dictionary with the new values and the old values that should be replaced Can be {1: range(1, 20), 2: [20, 21], 3: 22}. This will replace all values from 1 to 19 with 1, 20 and 21 with 2 and 22 with 3.
keep_untouched_codes
:bool
- This will keep the old labels of the column and just add the new ones instead of replacing all labels. Defaults to True.
def recode_group(self, old_group: str, new_group: str, values: dict[int, typing.Any], keep_untouched_codes: bool = True)
-
Recodes a group of columns based on a dictionary with the new values.
Args
old_group
:str
- Name of the group
new_group
:str
- Name of the new group
values
:dict[int, Any]
- A dictionary with the new values and the old values that should be replaced Can be {1: range(1, 20), 2: [20, 21], 3: 22}. This will replace all values from 1 to 19 with 1, 20 and 21 with 2 and 22 with 3.
keep_untouched_codes
:bool
- This will keep the old labels of the column and just add the new ones instead of replacing all labels. Defaults to True.
def remove_from_group(self, column: str)
-
Removes a column from a column group.
Args
column
:str
- Name of the column
def remove_speeders(self, speeder_value: float = None, Interview_Duration_Column: str = 'DURINT') ‑> None
-
Remove speeder rows from the DataFrame where interview duration is below the calculated speeder threshold.
Args
speeder_value
:float
- The precalculated speeder threshold value. If not provided, it will be calculated using _calculate_speeder_value.
Interview_Duration_Column
:str
- Name of the column containing interview durations. Default is "DURINT".
def rename_column(self, name: str, new_name: str)
-
Renames a column.
Args
name
:str
- Old name of the column
new_name
:str
- New name of the column
def rename_group(self, group: str, new_group_name: str)
-
renames a group
Args
group
:str
- group to rename
new_group_name
:str
- new group name
def select_columns(self, columns: list)
-
Selects columns and removes the others
Args
columns
:list
- Names of the columns to select
def show_column_info(self, column: str, show_objects: bool = False)
-
Shows info about the value labels and variable label of the given column.
Args
column
:str
- Column to be shown
show_objects
:bool
- if True it prints lists value_labels
def show_column_meta(self, column: str)
-
Shows info about the meta data of the given column.
Args
column
:str
- Column to be shown
def show_crosstab(self, columns: list[str], save_crosstab: bool = False, cross_tab_name: str = 'cross_tab', drop_na: bool = False, show_margins: bool = True)
-
Creates and shows a crosstab with a set of row and a set of column breaks
Args
columns
:list[str]
- list of dataframe columns in crosstab
save_crosstab
:bool
, optional- boolean to indicate if crosstab is saved in an excel file
cross_tab_name
:str
, optional- name of the excel file
drop_na
:bool
- if True it doesn't show rows and columns if all of their values are zero
show_margins
:bool
- Shows the total of rows and columns if True
def show_group_info(self, group: str)
-
Shows info about the given group.
Args
group
:str
- The name of a group of columns
def show_group_meta(self, group: str)
-
Shows info about the meta data of the given group.
Args
group
:str
- The name of a group of columns
def single_label(self, column: str, text: str, verbose: bool = True)
-
Updates the label of a variable of kind = "single".
Args
column
:str
- Name of the column
text
:str
- New text of the label
verbose
:bool
- Prints warnings if True. Defaults to True.
def value_labels(self, column: str, value_labels: Union[dict[int, str], str], keep_untouched_codes: bool = False)
-
Updates the value labels of a column.
Args
column
:str
- Name of the column
value_labels
:Union[dict[int, str], str]
- A dictionary with new labels {1 : "label for code 1", 2: "label for code 2"} or the column name with the labels to be used "column_name"
keep_untouched_codes
:bool
, optional- This will keep the old labels of the column and just add the new ones instead of replacing all labels. Defaults to False.